core/stdarch/crates/core_arch/src/x86/
avx512bw.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4    ptr,
5};
6
7#[cfg(test)]
8use stdarch_test::assert_instr;
9
10/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
11///
12/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi16&expand=30)
13#[inline]
14#[target_feature(enable = "avx512bw")]
15#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16#[cfg_attr(test, assert_instr(vpabsw))]
17pub fn _mm512_abs_epi16(a: __m512i) -> __m512i {
18    unsafe {
19        let a = a.as_i16x32();
20        let cmp: i16x32 = simd_gt(a, i16x32::ZERO);
21        transmute(simd_select(cmp, a, simd_neg(a)))
22    }
23}
24
25/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26///
27/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi16&expand=31)
28#[inline]
29#[target_feature(enable = "avx512bw")]
30#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31#[cfg_attr(test, assert_instr(vpabsw))]
32pub fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
33    unsafe {
34        let abs = _mm512_abs_epi16(a).as_i16x32();
35        transmute(simd_select_bitmask(k, abs, src.as_i16x32()))
36    }
37}
38
39/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
40///
41/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi16&expand=32)
42#[inline]
43#[target_feature(enable = "avx512bw")]
44#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
45#[cfg_attr(test, assert_instr(vpabsw))]
46pub fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
47    unsafe {
48        let abs = _mm512_abs_epi16(a).as_i16x32();
49        transmute(simd_select_bitmask(k, abs, i16x32::ZERO))
50    }
51}
52
53/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
54///
55/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi16&expand=28)
56#[inline]
57#[target_feature(enable = "avx512bw,avx512vl")]
58#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
59#[cfg_attr(test, assert_instr(vpabsw))]
60pub fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
61    unsafe {
62        let abs = _mm256_abs_epi16(a).as_i16x16();
63        transmute(simd_select_bitmask(k, abs, src.as_i16x16()))
64    }
65}
66
67/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
68///
69/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi16&expand=29)
70#[inline]
71#[target_feature(enable = "avx512bw,avx512vl")]
72#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
73#[cfg_attr(test, assert_instr(vpabsw))]
74pub fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i {
75    unsafe {
76        let abs = _mm256_abs_epi16(a).as_i16x16();
77        transmute(simd_select_bitmask(k, abs, i16x16::ZERO))
78    }
79}
80
81/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
82///
83/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi16&expand=25)
84#[inline]
85#[target_feature(enable = "avx512bw,avx512vl")]
86#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
87#[cfg_attr(test, assert_instr(vpabsw))]
88pub fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
89    unsafe {
90        let abs = _mm_abs_epi16(a).as_i16x8();
91        transmute(simd_select_bitmask(k, abs, src.as_i16x8()))
92    }
93}
94
95/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
96///
97/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi16&expand=26)
98#[inline]
99#[target_feature(enable = "avx512bw,avx512vl")]
100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
101#[cfg_attr(test, assert_instr(vpabsw))]
102pub fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
103    unsafe {
104        let abs = _mm_abs_epi16(a).as_i16x8();
105        transmute(simd_select_bitmask(k, abs, i16x8::ZERO))
106    }
107}
108
109/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst.
110///
111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi8&expand=57)
112#[inline]
113#[target_feature(enable = "avx512bw")]
114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
115#[cfg_attr(test, assert_instr(vpabsb))]
116pub fn _mm512_abs_epi8(a: __m512i) -> __m512i {
117    unsafe {
118        let a = a.as_i8x64();
119        let cmp: i8x64 = simd_gt(a, i8x64::ZERO);
120        transmute(simd_select(cmp, a, simd_neg(a)))
121    }
122}
123
124/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
125///
126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi8&expand=58)
127#[inline]
128#[target_feature(enable = "avx512bw")]
129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
130#[cfg_attr(test, assert_instr(vpabsb))]
131pub fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
132    unsafe {
133        let abs = _mm512_abs_epi8(a).as_i8x64();
134        transmute(simd_select_bitmask(k, abs, src.as_i8x64()))
135    }
136}
137
138/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
139///
140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi8&expand=59)
141#[inline]
142#[target_feature(enable = "avx512bw")]
143#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
144#[cfg_attr(test, assert_instr(vpabsb))]
145pub fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
146    unsafe {
147        let abs = _mm512_abs_epi8(a).as_i8x64();
148        transmute(simd_select_bitmask(k, abs, i8x64::ZERO))
149    }
150}
151
152/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
153///
154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi8&expand=55)
155#[inline]
156#[target_feature(enable = "avx512bw,avx512vl")]
157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
158#[cfg_attr(test, assert_instr(vpabsb))]
159pub fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
160    unsafe {
161        let abs = _mm256_abs_epi8(a).as_i8x32();
162        transmute(simd_select_bitmask(k, abs, src.as_i8x32()))
163    }
164}
165
166/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
167///
168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi8&expand=56)
169#[inline]
170#[target_feature(enable = "avx512bw,avx512vl")]
171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
172#[cfg_attr(test, assert_instr(vpabsb))]
173pub fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i {
174    unsafe {
175        let abs = _mm256_abs_epi8(a).as_i8x32();
176        transmute(simd_select_bitmask(k, abs, i8x32::ZERO))
177    }
178}
179
180/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set)
181///
182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi8&expand=52)
183#[inline]
184#[target_feature(enable = "avx512bw,avx512vl")]
185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
186#[cfg_attr(test, assert_instr(vpabsb))]
187pub fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
188    unsafe {
189        let abs = _mm_abs_epi8(a).as_i8x16();
190        transmute(simd_select_bitmask(k, abs, src.as_i8x16()))
191    }
192}
193
194/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
195///
196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi8&expand=53)
197#[inline]
198#[target_feature(enable = "avx512bw,avx512vl")]
199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
200#[cfg_attr(test, assert_instr(vpabsb))]
201pub fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i {
202    unsafe {
203        let abs = _mm_abs_epi8(a).as_i8x16();
204        transmute(simd_select_bitmask(k, abs, i8x16::ZERO))
205    }
206}
207
208/// Add packed 16-bit integers in a and b, and store the results in dst.
209///
210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi16&expand=91)
211#[inline]
212#[target_feature(enable = "avx512bw")]
213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
214#[cfg_attr(test, assert_instr(vpaddw))]
215pub fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
216    unsafe { transmute(simd_add(a.as_i16x32(), b.as_i16x32())) }
217}
218
219/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
220///
221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi16&expand=92)
222#[inline]
223#[target_feature(enable = "avx512bw")]
224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
225#[cfg_attr(test, assert_instr(vpaddw))]
226pub fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
227    unsafe {
228        let add = _mm512_add_epi16(a, b).as_i16x32();
229        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
230    }
231}
232
233/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
234///
235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi16&expand=93)
236#[inline]
237#[target_feature(enable = "avx512bw")]
238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
239#[cfg_attr(test, assert_instr(vpaddw))]
240pub fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
241    unsafe {
242        let add = _mm512_add_epi16(a, b).as_i16x32();
243        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
244    }
245}
246
247/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
248///
249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi16&expand=89)
250#[inline]
251#[target_feature(enable = "avx512bw,avx512vl")]
252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
253#[cfg_attr(test, assert_instr(vpaddw))]
254pub fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
255    unsafe {
256        let add = _mm256_add_epi16(a, b).as_i16x16();
257        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
258    }
259}
260
261/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
262///
263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi16&expand=90)
264#[inline]
265#[target_feature(enable = "avx512bw,avx512vl")]
266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
267#[cfg_attr(test, assert_instr(vpaddw))]
268pub fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
269    unsafe {
270        let add = _mm256_add_epi16(a, b).as_i16x16();
271        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
272    }
273}
274
275/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
276///
277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi16&expand=86)
278#[inline]
279#[target_feature(enable = "avx512bw,avx512vl")]
280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
281#[cfg_attr(test, assert_instr(vpaddw))]
282pub fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
283    unsafe {
284        let add = _mm_add_epi16(a, b).as_i16x8();
285        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
286    }
287}
288
289/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
290///
291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi16&expand=87)
292#[inline]
293#[target_feature(enable = "avx512bw,avx512vl")]
294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
295#[cfg_attr(test, assert_instr(vpaddw))]
296pub fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
297    unsafe {
298        let add = _mm_add_epi16(a, b).as_i16x8();
299        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
300    }
301}
302
303/// Add packed 8-bit integers in a and b, and store the results in dst.
304///
305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi8&expand=118)
306#[inline]
307#[target_feature(enable = "avx512bw")]
308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
309#[cfg_attr(test, assert_instr(vpaddb))]
310pub fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
311    unsafe { transmute(simd_add(a.as_i8x64(), b.as_i8x64())) }
312}
313
314/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
315///
316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi8&expand=119)
317#[inline]
318#[target_feature(enable = "avx512bw")]
319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
320#[cfg_attr(test, assert_instr(vpaddb))]
321pub fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
322    unsafe {
323        let add = _mm512_add_epi8(a, b).as_i8x64();
324        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
325    }
326}
327
328/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
329///
330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi8&expand=120)
331#[inline]
332#[target_feature(enable = "avx512bw")]
333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
334#[cfg_attr(test, assert_instr(vpaddb))]
335pub fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
336    unsafe {
337        let add = _mm512_add_epi8(a, b).as_i8x64();
338        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
339    }
340}
341
342/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
343///
344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi8&expand=116)
345#[inline]
346#[target_feature(enable = "avx512bw,avx512vl")]
347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
348#[cfg_attr(test, assert_instr(vpaddb))]
349pub fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
350    unsafe {
351        let add = _mm256_add_epi8(a, b).as_i8x32();
352        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
353    }
354}
355
356/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
357///
358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi8&expand=117)
359#[inline]
360#[target_feature(enable = "avx512bw,avx512vl")]
361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
362#[cfg_attr(test, assert_instr(vpaddb))]
363pub fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
364    unsafe {
365        let add = _mm256_add_epi8(a, b).as_i8x32();
366        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
367    }
368}
369
370/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
371///
372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi8&expand=113)
373#[inline]
374#[target_feature(enable = "avx512bw,avx512vl")]
375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
376#[cfg_attr(test, assert_instr(vpaddb))]
377pub fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
378    unsafe {
379        let add = _mm_add_epi8(a, b).as_i8x16();
380        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
381    }
382}
383
384/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
385///
386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi8&expand=114)
387#[inline]
388#[target_feature(enable = "avx512bw,avx512vl")]
389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
390#[cfg_attr(test, assert_instr(vpaddb))]
391pub fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
392    unsafe {
393        let add = _mm_add_epi8(a, b).as_i8x16();
394        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
395    }
396}
397
398/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst.
399///
400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu16&expand=197)
401#[inline]
402#[target_feature(enable = "avx512bw")]
403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
404#[cfg_attr(test, assert_instr(vpaddusw))]
405pub fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
406    unsafe { transmute(simd_saturating_add(a.as_u16x32(), b.as_u16x32())) }
407}
408
409/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
410///
411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu16&expand=198)
412#[inline]
413#[target_feature(enable = "avx512bw")]
414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
415#[cfg_attr(test, assert_instr(vpaddusw))]
416pub fn _mm512_mask_adds_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
417    unsafe {
418        let add = _mm512_adds_epu16(a, b).as_u16x32();
419        transmute(simd_select_bitmask(k, add, src.as_u16x32()))
420    }
421}
422
423/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
424///
425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu16&expand=199)
426#[inline]
427#[target_feature(enable = "avx512bw")]
428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
429#[cfg_attr(test, assert_instr(vpaddusw))]
430pub fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
431    unsafe {
432        let add = _mm512_adds_epu16(a, b).as_u16x32();
433        transmute(simd_select_bitmask(k, add, u16x32::ZERO))
434    }
435}
436
437/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
438///
439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu16&expand=195)
440#[inline]
441#[target_feature(enable = "avx512bw,avx512vl")]
442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
443#[cfg_attr(test, assert_instr(vpaddusw))]
444pub fn _mm256_mask_adds_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
445    unsafe {
446        let add = _mm256_adds_epu16(a, b).as_u16x16();
447        transmute(simd_select_bitmask(k, add, src.as_u16x16()))
448    }
449}
450
451/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
452///
453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu16&expand=196)
454#[inline]
455#[target_feature(enable = "avx512bw,avx512vl")]
456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
457#[cfg_attr(test, assert_instr(vpaddusw))]
458pub fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
459    unsafe {
460        let add = _mm256_adds_epu16(a, b).as_u16x16();
461        transmute(simd_select_bitmask(k, add, u16x16::ZERO))
462    }
463}
464
465/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
466///
467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu16&expand=192)
468#[inline]
469#[target_feature(enable = "avx512bw,avx512vl")]
470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
471#[cfg_attr(test, assert_instr(vpaddusw))]
472pub fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
473    unsafe {
474        let add = _mm_adds_epu16(a, b).as_u16x8();
475        transmute(simd_select_bitmask(k, add, src.as_u16x8()))
476    }
477}
478
479/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
480///
481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu16&expand=193)
482#[inline]
483#[target_feature(enable = "avx512bw,avx512vl")]
484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
485#[cfg_attr(test, assert_instr(vpaddusw))]
486pub fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
487    unsafe {
488        let add = _mm_adds_epu16(a, b).as_u16x8();
489        transmute(simd_select_bitmask(k, add, u16x8::ZERO))
490    }
491}
492
493/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst.
494///
495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu8&expand=206)
496#[inline]
497#[target_feature(enable = "avx512bw")]
498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
499#[cfg_attr(test, assert_instr(vpaddusb))]
500pub fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
501    unsafe { transmute(simd_saturating_add(a.as_u8x64(), b.as_u8x64())) }
502}
503
504/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
505///
506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu8&expand=207)
507#[inline]
508#[target_feature(enable = "avx512bw")]
509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
510#[cfg_attr(test, assert_instr(vpaddusb))]
511pub fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
512    unsafe {
513        let add = _mm512_adds_epu8(a, b).as_u8x64();
514        transmute(simd_select_bitmask(k, add, src.as_u8x64()))
515    }
516}
517
518/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
519///
520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu8&expand=208)
521#[inline]
522#[target_feature(enable = "avx512bw")]
523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
524#[cfg_attr(test, assert_instr(vpaddusb))]
525pub fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
526    unsafe {
527        let add = _mm512_adds_epu8(a, b).as_u8x64();
528        transmute(simd_select_bitmask(k, add, u8x64::ZERO))
529    }
530}
531
532/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
533///
534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu8&expand=204)
535#[inline]
536#[target_feature(enable = "avx512bw,avx512vl")]
537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
538#[cfg_attr(test, assert_instr(vpaddusb))]
539pub fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
540    unsafe {
541        let add = _mm256_adds_epu8(a, b).as_u8x32();
542        transmute(simd_select_bitmask(k, add, src.as_u8x32()))
543    }
544}
545
546/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
547///
548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu8&expand=205)
549#[inline]
550#[target_feature(enable = "avx512bw,avx512vl")]
551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
552#[cfg_attr(test, assert_instr(vpaddusb))]
553pub fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
554    unsafe {
555        let add = _mm256_adds_epu8(a, b).as_u8x32();
556        transmute(simd_select_bitmask(k, add, u8x32::ZERO))
557    }
558}
559
560/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
561///
562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu8&expand=201)
563#[inline]
564#[target_feature(enable = "avx512bw,avx512vl")]
565#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
566#[cfg_attr(test, assert_instr(vpaddusb))]
567pub fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
568    unsafe {
569        let add = _mm_adds_epu8(a, b).as_u8x16();
570        transmute(simd_select_bitmask(k, add, src.as_u8x16()))
571    }
572}
573
574/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
575///
576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu8&expand=202)
577#[inline]
578#[target_feature(enable = "avx512bw,avx512vl")]
579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
580#[cfg_attr(test, assert_instr(vpaddusb))]
581pub fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
582    unsafe {
583        let add = _mm_adds_epu8(a, b).as_u8x16();
584        transmute(simd_select_bitmask(k, add, u8x16::ZERO))
585    }
586}
587
588/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst.
589///
590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi16&expand=179)
591#[inline]
592#[target_feature(enable = "avx512bw")]
593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
594#[cfg_attr(test, assert_instr(vpaddsw))]
595pub fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
596    unsafe { transmute(simd_saturating_add(a.as_i16x32(), b.as_i16x32())) }
597}
598
599/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
600///
601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi16&expand=180)
602#[inline]
603#[target_feature(enable = "avx512bw")]
604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
605#[cfg_attr(test, assert_instr(vpaddsw))]
606pub fn _mm512_mask_adds_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
607    unsafe {
608        let add = _mm512_adds_epi16(a, b).as_i16x32();
609        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
610    }
611}
612
613/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
614///
615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi16&expand=181)
616#[inline]
617#[target_feature(enable = "avx512bw")]
618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
619#[cfg_attr(test, assert_instr(vpaddsw))]
620pub fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
621    unsafe {
622        let add = _mm512_adds_epi16(a, b).as_i16x32();
623        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
624    }
625}
626
627/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
628///
629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi16&expand=177)
630#[inline]
631#[target_feature(enable = "avx512bw,avx512vl")]
632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
633#[cfg_attr(test, assert_instr(vpaddsw))]
634pub fn _mm256_mask_adds_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
635    unsafe {
636        let add = _mm256_adds_epi16(a, b).as_i16x16();
637        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
638    }
639}
640
641/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
642///
643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi16&expand=178)
644#[inline]
645#[target_feature(enable = "avx512bw,avx512vl")]
646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
647#[cfg_attr(test, assert_instr(vpaddsw))]
648pub fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
649    unsafe {
650        let add = _mm256_adds_epi16(a, b).as_i16x16();
651        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
652    }
653}
654
655/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
656///
657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi16&expand=174)
658#[inline]
659#[target_feature(enable = "avx512bw,avx512vl")]
660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
661#[cfg_attr(test, assert_instr(vpaddsw))]
662pub fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
663    unsafe {
664        let add = _mm_adds_epi16(a, b).as_i16x8();
665        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
666    }
667}
668
669/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
670///
671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi16&expand=175)
672#[inline]
673#[target_feature(enable = "avx512bw,avx512vl")]
674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
675#[cfg_attr(test, assert_instr(vpaddsw))]
676pub fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
677    unsafe {
678        let add = _mm_adds_epi16(a, b).as_i16x8();
679        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
680    }
681}
682
683/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst.
684///
685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi8&expand=188)
686#[inline]
687#[target_feature(enable = "avx512bw")]
688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
689#[cfg_attr(test, assert_instr(vpaddsb))]
690pub fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
691    unsafe { transmute(simd_saturating_add(a.as_i8x64(), b.as_i8x64())) }
692}
693
694/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
695///
696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi8&expand=189)
697#[inline]
698#[target_feature(enable = "avx512bw")]
699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
700#[cfg_attr(test, assert_instr(vpaddsb))]
701pub fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
702    unsafe {
703        let add = _mm512_adds_epi8(a, b).as_i8x64();
704        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
705    }
706}
707
708/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
709///
710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi8&expand=190)
711#[inline]
712#[target_feature(enable = "avx512bw")]
713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
714#[cfg_attr(test, assert_instr(vpaddsb))]
715pub fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
716    unsafe {
717        let add = _mm512_adds_epi8(a, b).as_i8x64();
718        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
719    }
720}
721
722/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
723///
724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi8&expand=186)
725#[inline]
726#[target_feature(enable = "avx512bw,avx512vl")]
727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
728#[cfg_attr(test, assert_instr(vpaddsb))]
729pub fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
730    unsafe {
731        let add = _mm256_adds_epi8(a, b).as_i8x32();
732        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
733    }
734}
735
736/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
737///
738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi8&expand=187)
739#[inline]
740#[target_feature(enable = "avx512bw,avx512vl")]
741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
742#[cfg_attr(test, assert_instr(vpaddsb))]
743pub fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
744    unsafe {
745        let add = _mm256_adds_epi8(a, b).as_i8x32();
746        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
747    }
748}
749
750/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
751///
752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi8&expand=183)
753#[inline]
754#[target_feature(enable = "avx512bw,avx512vl")]
755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
756#[cfg_attr(test, assert_instr(vpaddsb))]
757pub fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
758    unsafe {
759        let add = _mm_adds_epi8(a, b).as_i8x16();
760        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
761    }
762}
763
764/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
765///
766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi8&expand=184)
767#[inline]
768#[target_feature(enable = "avx512bw,avx512vl")]
769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
770#[cfg_attr(test, assert_instr(vpaddsb))]
771pub fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
772    unsafe {
773        let add = _mm_adds_epi8(a, b).as_i8x16();
774        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
775    }
776}
777
778/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst.
779///
780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi16&expand=5685)
781#[inline]
782#[target_feature(enable = "avx512bw")]
783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
784#[cfg_attr(test, assert_instr(vpsubw))]
785pub fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
786    unsafe { transmute(simd_sub(a.as_i16x32(), b.as_i16x32())) }
787}
788
789/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
790///
791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi16&expand=5683)
792#[inline]
793#[target_feature(enable = "avx512bw")]
794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
795#[cfg_attr(test, assert_instr(vpsubw))]
796pub fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
797    unsafe {
798        let sub = _mm512_sub_epi16(a, b).as_i16x32();
799        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
800    }
801}
802
803/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
804///
805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi16&expand=5684)
806#[inline]
807#[target_feature(enable = "avx512bw")]
808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
809#[cfg_attr(test, assert_instr(vpsubw))]
810pub fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
811    unsafe {
812        let sub = _mm512_sub_epi16(a, b).as_i16x32();
813        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
814    }
815}
816
817/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
818///
819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi16&expand=5680)
820#[inline]
821#[target_feature(enable = "avx512bw,avx512vl")]
822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
823#[cfg_attr(test, assert_instr(vpsubw))]
824pub fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
825    unsafe {
826        let sub = _mm256_sub_epi16(a, b).as_i16x16();
827        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
828    }
829}
830
831/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
832///
833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi16&expand=5681)
834#[inline]
835#[target_feature(enable = "avx512bw,avx512vl")]
836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
837#[cfg_attr(test, assert_instr(vpsubw))]
838pub fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
839    unsafe {
840        let sub = _mm256_sub_epi16(a, b).as_i16x16();
841        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
842    }
843}
844
845/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
846///
847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi16&expand=5677)
848#[inline]
849#[target_feature(enable = "avx512bw,avx512vl")]
850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
851#[cfg_attr(test, assert_instr(vpsubw))]
852pub fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
853    unsafe {
854        let sub = _mm_sub_epi16(a, b).as_i16x8();
855        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
856    }
857}
858
859/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
860///
861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi16&expand=5678)
862#[inline]
863#[target_feature(enable = "avx512bw,avx512vl")]
864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
865#[cfg_attr(test, assert_instr(vpsubw))]
866pub fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
867    unsafe {
868        let sub = _mm_sub_epi16(a, b).as_i16x8();
869        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
870    }
871}
872
873/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst.
874///
875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi8&expand=5712)
876#[inline]
877#[target_feature(enable = "avx512bw")]
878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
879#[cfg_attr(test, assert_instr(vpsubb))]
880pub fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
881    unsafe { transmute(simd_sub(a.as_i8x64(), b.as_i8x64())) }
882}
883
884/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
885///
886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi8&expand=5710)
887#[inline]
888#[target_feature(enable = "avx512bw")]
889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
890#[cfg_attr(test, assert_instr(vpsubb))]
891pub fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
892    unsafe {
893        let sub = _mm512_sub_epi8(a, b).as_i8x64();
894        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
895    }
896}
897
898/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
899///
900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi8&expand=5711)
901#[inline]
902#[target_feature(enable = "avx512bw")]
903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
904#[cfg_attr(test, assert_instr(vpsubb))]
905pub fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
906    unsafe {
907        let sub = _mm512_sub_epi8(a, b).as_i8x64();
908        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
909    }
910}
911
912/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
913///
914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi8&expand=5707)
915#[inline]
916#[target_feature(enable = "avx512bw,avx512vl")]
917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
918#[cfg_attr(test, assert_instr(vpsubb))]
919pub fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
920    unsafe {
921        let sub = _mm256_sub_epi8(a, b).as_i8x32();
922        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
923    }
924}
925
926/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
927///
928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi8&expand=5708)
929#[inline]
930#[target_feature(enable = "avx512bw,avx512vl")]
931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
932#[cfg_attr(test, assert_instr(vpsubb))]
933pub fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
934    unsafe {
935        let sub = _mm256_sub_epi8(a, b).as_i8x32();
936        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
937    }
938}
939
940/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
941///
942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi8&expand=5704)
943#[inline]
944#[target_feature(enable = "avx512bw,avx512vl")]
945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
946#[cfg_attr(test, assert_instr(vpsubb))]
947pub fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
948    unsafe {
949        let sub = _mm_sub_epi8(a, b).as_i8x16();
950        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
951    }
952}
953
954/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
955///
956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi8&expand=5705)
957#[inline]
958#[target_feature(enable = "avx512bw,avx512vl")]
959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
960#[cfg_attr(test, assert_instr(vpsubb))]
961pub fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
962    unsafe {
963        let sub = _mm_sub_epi8(a, b).as_i8x16();
964        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
965    }
966}
967
968/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst.
969///
970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu16&expand=5793)
971#[inline]
972#[target_feature(enable = "avx512bw")]
973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
974#[cfg_attr(test, assert_instr(vpsubusw))]
975pub fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
976    unsafe { transmute(simd_saturating_sub(a.as_u16x32(), b.as_u16x32())) }
977}
978
979/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
980///
981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu16&expand=5791)
982#[inline]
983#[target_feature(enable = "avx512bw")]
984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
985#[cfg_attr(test, assert_instr(vpsubusw))]
986pub fn _mm512_mask_subs_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
987    unsafe {
988        let sub = _mm512_subs_epu16(a, b).as_u16x32();
989        transmute(simd_select_bitmask(k, sub, src.as_u16x32()))
990    }
991}
992
993/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
994///
995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu16&expand=5792)
996#[inline]
997#[target_feature(enable = "avx512bw")]
998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
999#[cfg_attr(test, assert_instr(vpsubusw))]
1000pub fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1001    unsafe {
1002        let sub = _mm512_subs_epu16(a, b).as_u16x32();
1003        transmute(simd_select_bitmask(k, sub, u16x32::ZERO))
1004    }
1005}
1006
1007/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1008///
1009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu16&expand=5788)
1010#[inline]
1011#[target_feature(enable = "avx512bw,avx512vl")]
1012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1013#[cfg_attr(test, assert_instr(vpsubusw))]
1014pub fn _mm256_mask_subs_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1015    unsafe {
1016        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1017        transmute(simd_select_bitmask(k, sub, src.as_u16x16()))
1018    }
1019}
1020
1021/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1022///
1023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu16&expand=5789)
1024#[inline]
1025#[target_feature(enable = "avx512bw,avx512vl")]
1026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1027#[cfg_attr(test, assert_instr(vpsubusw))]
1028pub fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1029    unsafe {
1030        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1031        transmute(simd_select_bitmask(k, sub, u16x16::ZERO))
1032    }
1033}
1034
1035/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1036///
1037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu16&expand=5785)
1038#[inline]
1039#[target_feature(enable = "avx512bw,avx512vl")]
1040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1041#[cfg_attr(test, assert_instr(vpsubusw))]
1042pub fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1043    unsafe {
1044        let sub = _mm_subs_epu16(a, b).as_u16x8();
1045        transmute(simd_select_bitmask(k, sub, src.as_u16x8()))
1046    }
1047}
1048
1049/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1050///
1051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu16&expand=5786)
1052#[inline]
1053#[target_feature(enable = "avx512bw,avx512vl")]
1054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1055#[cfg_attr(test, assert_instr(vpsubusw))]
1056pub fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1057    unsafe {
1058        let sub = _mm_subs_epu16(a, b).as_u16x8();
1059        transmute(simd_select_bitmask(k, sub, u16x8::ZERO))
1060    }
1061}
1062
1063/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst.
1064///
1065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu8&expand=5802)
1066#[inline]
1067#[target_feature(enable = "avx512bw")]
1068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1069#[cfg_attr(test, assert_instr(vpsubusb))]
1070pub fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
1071    unsafe { transmute(simd_saturating_sub(a.as_u8x64(), b.as_u8x64())) }
1072}
1073
1074/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1075///
1076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu8&expand=5800)
1077#[inline]
1078#[target_feature(enable = "avx512bw")]
1079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1080#[cfg_attr(test, assert_instr(vpsubusb))]
1081pub fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1082    unsafe {
1083        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1084        transmute(simd_select_bitmask(k, sub, src.as_u8x64()))
1085    }
1086}
1087
1088/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1089///
1090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu8&expand=5801)
1091#[inline]
1092#[target_feature(enable = "avx512bw")]
1093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1094#[cfg_attr(test, assert_instr(vpsubusb))]
1095pub fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1096    unsafe {
1097        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1098        transmute(simd_select_bitmask(k, sub, u8x64::ZERO))
1099    }
1100}
1101
1102/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1103///
1104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu8&expand=5797)
1105#[inline]
1106#[target_feature(enable = "avx512bw,avx512vl")]
1107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1108#[cfg_attr(test, assert_instr(vpsubusb))]
1109pub fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1110    unsafe {
1111        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1112        transmute(simd_select_bitmask(k, sub, src.as_u8x32()))
1113    }
1114}
1115
1116/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1117///
1118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu8&expand=5798)
1119#[inline]
1120#[target_feature(enable = "avx512bw,avx512vl")]
1121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1122#[cfg_attr(test, assert_instr(vpsubusb))]
1123pub fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1124    unsafe {
1125        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1126        transmute(simd_select_bitmask(k, sub, u8x32::ZERO))
1127    }
1128}
1129
1130/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1131///
1132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu8&expand=5794)
1133#[inline]
1134#[target_feature(enable = "avx512bw,avx512vl")]
1135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1136#[cfg_attr(test, assert_instr(vpsubusb))]
1137pub fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1138    unsafe {
1139        let sub = _mm_subs_epu8(a, b).as_u8x16();
1140        transmute(simd_select_bitmask(k, sub, src.as_u8x16()))
1141    }
1142}
1143
1144/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1145///
1146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu8&expand=5795)
1147#[inline]
1148#[target_feature(enable = "avx512bw,avx512vl")]
1149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1150#[cfg_attr(test, assert_instr(vpsubusb))]
1151pub fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1152    unsafe {
1153        let sub = _mm_subs_epu8(a, b).as_u8x16();
1154        transmute(simd_select_bitmask(k, sub, u8x16::ZERO))
1155    }
1156}
1157
1158/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst.
1159///
1160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi16&expand=5775)
1161#[inline]
1162#[target_feature(enable = "avx512bw")]
1163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1164#[cfg_attr(test, assert_instr(vpsubsw))]
1165pub fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
1166    unsafe { transmute(simd_saturating_sub(a.as_i16x32(), b.as_i16x32())) }
1167}
1168
1169/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1170///
1171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi16&expand=5773)
1172#[inline]
1173#[target_feature(enable = "avx512bw")]
1174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1175#[cfg_attr(test, assert_instr(vpsubsw))]
1176pub fn _mm512_mask_subs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1177    unsafe {
1178        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1179        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
1180    }
1181}
1182
1183/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1184///
1185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi16&expand=5774)
1186#[inline]
1187#[target_feature(enable = "avx512bw")]
1188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1189#[cfg_attr(test, assert_instr(vpsubsw))]
1190pub fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1191    unsafe {
1192        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1193        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
1194    }
1195}
1196
1197/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1198///
1199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi16&expand=5770)
1200#[inline]
1201#[target_feature(enable = "avx512bw,avx512vl")]
1202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1203#[cfg_attr(test, assert_instr(vpsubsw))]
1204pub fn _mm256_mask_subs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1205    unsafe {
1206        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1207        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
1208    }
1209}
1210
1211/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1212///
1213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi16&expand=5771)
1214#[inline]
1215#[target_feature(enable = "avx512bw,avx512vl")]
1216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1217#[cfg_attr(test, assert_instr(vpsubsw))]
1218pub fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1219    unsafe {
1220        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1221        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
1222    }
1223}
1224
1225/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1226///
1227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi16&expand=5767)
1228#[inline]
1229#[target_feature(enable = "avx512bw,avx512vl")]
1230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1231#[cfg_attr(test, assert_instr(vpsubsw))]
1232pub fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1233    unsafe {
1234        let sub = _mm_subs_epi16(a, b).as_i16x8();
1235        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
1236    }
1237}
1238
1239/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1240///
1241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi16&expand=5768)
1242#[inline]
1243#[target_feature(enable = "avx512bw,avx512vl")]
1244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1245#[cfg_attr(test, assert_instr(vpsubsw))]
1246pub fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1247    unsafe {
1248        let sub = _mm_subs_epi16(a, b).as_i16x8();
1249        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
1250    }
1251}
1252
1253/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst.
1254///
1255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi8&expand=5784)
1256#[inline]
1257#[target_feature(enable = "avx512bw")]
1258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1259#[cfg_attr(test, assert_instr(vpsubsb))]
1260pub fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
1261    unsafe { transmute(simd_saturating_sub(a.as_i8x64(), b.as_i8x64())) }
1262}
1263
1264/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1265///
1266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi8&expand=5782)
1267#[inline]
1268#[target_feature(enable = "avx512bw")]
1269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1270#[cfg_attr(test, assert_instr(vpsubsb))]
1271pub fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1272    unsafe {
1273        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1274        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
1275    }
1276}
1277
1278/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1279///
1280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi8&expand=5783)
1281#[inline]
1282#[target_feature(enable = "avx512bw")]
1283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1284#[cfg_attr(test, assert_instr(vpsubsb))]
1285pub fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1286    unsafe {
1287        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1288        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
1289    }
1290}
1291
1292/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1293///
1294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi8&expand=5779)
1295#[inline]
1296#[target_feature(enable = "avx512bw,avx512vl")]
1297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1298#[cfg_attr(test, assert_instr(vpsubsb))]
1299pub fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1300    unsafe {
1301        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1302        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
1303    }
1304}
1305
1306/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1307///
1308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi8&expand=5780)
1309#[inline]
1310#[target_feature(enable = "avx512bw,avx512vl")]
1311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1312#[cfg_attr(test, assert_instr(vpsubsb))]
1313pub fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1314    unsafe {
1315        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1316        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
1317    }
1318}
1319
1320/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1321///
1322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi8&expand=5776)
1323#[inline]
1324#[target_feature(enable = "avx512bw,avx512vl")]
1325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1326#[cfg_attr(test, assert_instr(vpsubsb))]
1327pub fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1328    unsafe {
1329        let sub = _mm_subs_epi8(a, b).as_i8x16();
1330        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
1331    }
1332}
1333
1334/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1335///
1336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi8&expand=5777)
1337#[inline]
1338#[target_feature(enable = "avx512bw,avx512vl")]
1339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1340#[cfg_attr(test, assert_instr(vpsubsb))]
1341pub fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1342    unsafe {
1343        let sub = _mm_subs_epi8(a, b).as_i8x16();
1344        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
1345    }
1346}
1347
1348/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1349///
1350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epu16&expand=3973)
1351#[inline]
1352#[target_feature(enable = "avx512bw")]
1353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1354#[cfg_attr(test, assert_instr(vpmulhuw))]
1355pub fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
1356    unsafe {
1357        let a = simd_cast::<_, u32x32>(a.as_u16x32());
1358        let b = simd_cast::<_, u32x32>(b.as_u16x32());
1359        let r = simd_shr(simd_mul(a, b), u32x32::splat(16));
1360        transmute(simd_cast::<u32x32, u16x32>(r))
1361    }
1362}
1363
1364/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1365///
1366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epu16&expand=3971)
1367#[inline]
1368#[target_feature(enable = "avx512bw")]
1369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1370#[cfg_attr(test, assert_instr(vpmulhuw))]
1371pub fn _mm512_mask_mulhi_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1372    unsafe {
1373        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1374        transmute(simd_select_bitmask(k, mul, src.as_u16x32()))
1375    }
1376}
1377
1378/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1379///
1380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epu16&expand=3972)
1381#[inline]
1382#[target_feature(enable = "avx512bw")]
1383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1384#[cfg_attr(test, assert_instr(vpmulhuw))]
1385pub fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1386    unsafe {
1387        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1388        transmute(simd_select_bitmask(k, mul, u16x32::ZERO))
1389    }
1390}
1391
1392/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1393///
1394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epu16&expand=3968)
1395#[inline]
1396#[target_feature(enable = "avx512bw,avx512vl")]
1397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1398#[cfg_attr(test, assert_instr(vpmulhuw))]
1399pub fn _mm256_mask_mulhi_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1400    unsafe {
1401        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1402        transmute(simd_select_bitmask(k, mul, src.as_u16x16()))
1403    }
1404}
1405
1406/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1407///
1408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epu16&expand=3969)
1409#[inline]
1410#[target_feature(enable = "avx512bw,avx512vl")]
1411#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1412#[cfg_attr(test, assert_instr(vpmulhuw))]
1413pub fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1414    unsafe {
1415        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1416        transmute(simd_select_bitmask(k, mul, u16x16::ZERO))
1417    }
1418}
1419
1420/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1421///
1422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epu16&expand=3965)
1423#[inline]
1424#[target_feature(enable = "avx512bw,avx512vl")]
1425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1426#[cfg_attr(test, assert_instr(vpmulhuw))]
1427pub fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1428    unsafe {
1429        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1430        transmute(simd_select_bitmask(k, mul, src.as_u16x8()))
1431    }
1432}
1433
1434/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1435///
1436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epu16&expand=3966)
1437#[inline]
1438#[target_feature(enable = "avx512bw,avx512vl")]
1439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1440#[cfg_attr(test, assert_instr(vpmulhuw))]
1441pub fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1442    unsafe {
1443        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1444        transmute(simd_select_bitmask(k, mul, u16x8::ZERO))
1445    }
1446}
1447
1448/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1449///
1450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epi16&expand=3962)
1451#[inline]
1452#[target_feature(enable = "avx512bw")]
1453#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1454#[cfg_attr(test, assert_instr(vpmulhw))]
1455pub fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
1456    unsafe {
1457        let a = simd_cast::<_, i32x32>(a.as_i16x32());
1458        let b = simd_cast::<_, i32x32>(b.as_i16x32());
1459        let r = simd_shr(simd_mul(a, b), i32x32::splat(16));
1460        transmute(simd_cast::<i32x32, i16x32>(r))
1461    }
1462}
1463
1464/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1465///
1466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epi16&expand=3960)
1467#[inline]
1468#[target_feature(enable = "avx512bw")]
1469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1470#[cfg_attr(test, assert_instr(vpmulhw))]
1471pub fn _mm512_mask_mulhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1472    unsafe {
1473        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1474        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1475    }
1476}
1477
1478/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1479///
1480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epi16&expand=3961)
1481#[inline]
1482#[target_feature(enable = "avx512bw")]
1483#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1484#[cfg_attr(test, assert_instr(vpmulhw))]
1485pub fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1486    unsafe {
1487        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1488        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1489    }
1490}
1491
1492/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1493///
1494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epi16&expand=3957)
1495#[inline]
1496#[target_feature(enable = "avx512bw,avx512vl")]
1497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1498#[cfg_attr(test, assert_instr(vpmulhw))]
1499pub fn _mm256_mask_mulhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1500    unsafe {
1501        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1502        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1503    }
1504}
1505
1506/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1507///
1508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epi16&expand=3958)
1509#[inline]
1510#[target_feature(enable = "avx512bw,avx512vl")]
1511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1512#[cfg_attr(test, assert_instr(vpmulhw))]
1513pub fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1514    unsafe {
1515        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1516        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1517    }
1518}
1519
1520/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1521///
1522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epi16&expand=3954)
1523#[inline]
1524#[target_feature(enable = "avx512bw,avx512vl")]
1525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1526#[cfg_attr(test, assert_instr(vpmulhw))]
1527pub fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1528    unsafe {
1529        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1530        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1531    }
1532}
1533
1534/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1535///
1536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epi16&expand=3955)
1537#[inline]
1538#[target_feature(enable = "avx512bw,avx512vl")]
1539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1540#[cfg_attr(test, assert_instr(vpmulhw))]
1541pub fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1542    unsafe {
1543        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1544        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1545    }
1546}
1547
1548/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst.
1549///
1550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhrs_epi16&expand=3986)
1551#[inline]
1552#[target_feature(enable = "avx512bw")]
1553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1554#[cfg_attr(test, assert_instr(vpmulhrsw))]
1555pub fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i {
1556    unsafe { transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32())) }
1557}
1558
1559/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1560///
1561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhrs_epi16&expand=3984)
1562#[inline]
1563#[target_feature(enable = "avx512bw")]
1564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1565#[cfg_attr(test, assert_instr(vpmulhrsw))]
1566pub fn _mm512_mask_mulhrs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1567    unsafe {
1568        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1569        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1570    }
1571}
1572
1573/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1574///
1575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhrs_epi16&expand=3985)
1576#[inline]
1577#[target_feature(enable = "avx512bw")]
1578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1579#[cfg_attr(test, assert_instr(vpmulhrsw))]
1580pub fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1581    unsafe {
1582        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1583        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1584    }
1585}
1586
1587/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1588///
1589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhrs_epi16&expand=3981)
1590#[inline]
1591#[target_feature(enable = "avx512bw,avx512vl")]
1592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1593#[cfg_attr(test, assert_instr(vpmulhrsw))]
1594pub fn _mm256_mask_mulhrs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1595    unsafe {
1596        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1597        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1598    }
1599}
1600
1601/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1602///
1603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhrs_epi16&expand=3982)
1604#[inline]
1605#[target_feature(enable = "avx512bw,avx512vl")]
1606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1607#[cfg_attr(test, assert_instr(vpmulhrsw))]
1608pub fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1609    unsafe {
1610        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1611        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1612    }
1613}
1614
1615/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1616///
1617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhrs_epi16&expand=3978)
1618#[inline]
1619#[target_feature(enable = "avx512bw,avx512vl")]
1620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1621#[cfg_attr(test, assert_instr(vpmulhrsw))]
1622pub fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1623    unsafe {
1624        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1625        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1626    }
1627}
1628
1629/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1630///
1631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhrs_epi16&expand=3979)
1632#[inline]
1633#[target_feature(enable = "avx512bw,avx512vl")]
1634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1635#[cfg_attr(test, assert_instr(vpmulhrsw))]
1636pub fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1637    unsafe {
1638        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1639        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1640    }
1641}
1642
1643/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst.
1644///
1645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi16&expand=3996)
1646#[inline]
1647#[target_feature(enable = "avx512bw")]
1648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1649#[cfg_attr(test, assert_instr(vpmullw))]
1650pub fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
1651    unsafe { transmute(simd_mul(a.as_i16x32(), b.as_i16x32())) }
1652}
1653
1654/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1655///
1656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi16&expand=3994)
1657#[inline]
1658#[target_feature(enable = "avx512bw")]
1659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1660#[cfg_attr(test, assert_instr(vpmullw))]
1661pub fn _mm512_mask_mullo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1662    unsafe {
1663        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1664        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1665    }
1666}
1667
1668/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1669///
1670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi16&expand=3995)
1671#[inline]
1672#[target_feature(enable = "avx512bw")]
1673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1674#[cfg_attr(test, assert_instr(vpmullw))]
1675pub fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1676    unsafe {
1677        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1678        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1679    }
1680}
1681
1682/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1683///
1684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi16&expand=3991)
1685#[inline]
1686#[target_feature(enable = "avx512bw,avx512vl")]
1687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1688#[cfg_attr(test, assert_instr(vpmullw))]
1689pub fn _mm256_mask_mullo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1690    unsafe {
1691        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1692        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1693    }
1694}
1695
1696/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1697///
1698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi16&expand=3992)
1699#[inline]
1700#[target_feature(enable = "avx512bw,avx512vl")]
1701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1702#[cfg_attr(test, assert_instr(vpmullw))]
1703pub fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1704    unsafe {
1705        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1706        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1707    }
1708}
1709
1710/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1711///
1712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi16&expand=3988)
1713#[inline]
1714#[target_feature(enable = "avx512bw,avx512vl")]
1715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1716#[cfg_attr(test, assert_instr(vpmullw))]
1717pub fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1718    unsafe {
1719        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1720        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1721    }
1722}
1723
1724/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1725///
1726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi16&expand=3989)
1727#[inline]
1728#[target_feature(enable = "avx512bw,avx512vl")]
1729#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1730#[cfg_attr(test, assert_instr(vpmullw))]
1731pub fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1732    unsafe {
1733        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1734        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1735    }
1736}
1737
1738/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst.
1739///
1740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu16&expand=3609)
1741#[inline]
1742#[target_feature(enable = "avx512bw")]
1743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1744#[cfg_attr(test, assert_instr(vpmaxuw))]
1745pub fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
1746    unsafe {
1747        let a = a.as_u16x32();
1748        let b = b.as_u16x32();
1749        transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
1750    }
1751}
1752
1753/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1754///
1755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu16&expand=3607)
1756#[inline]
1757#[target_feature(enable = "avx512bw")]
1758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1759#[cfg_attr(test, assert_instr(vpmaxuw))]
1760pub fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1761    unsafe {
1762        let max = _mm512_max_epu16(a, b).as_u16x32();
1763        transmute(simd_select_bitmask(k, max, src.as_u16x32()))
1764    }
1765}
1766
1767/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1768///
1769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu16&expand=3608)
1770#[inline]
1771#[target_feature(enable = "avx512bw")]
1772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1773#[cfg_attr(test, assert_instr(vpmaxuw))]
1774pub fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1775    unsafe {
1776        let max = _mm512_max_epu16(a, b).as_u16x32();
1777        transmute(simd_select_bitmask(k, max, u16x32::ZERO))
1778    }
1779}
1780
1781/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1782///
1783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu16&expand=3604)
1784#[inline]
1785#[target_feature(enable = "avx512bw,avx512vl")]
1786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1787#[cfg_attr(test, assert_instr(vpmaxuw))]
1788pub fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1789    unsafe {
1790        let max = _mm256_max_epu16(a, b).as_u16x16();
1791        transmute(simd_select_bitmask(k, max, src.as_u16x16()))
1792    }
1793}
1794
1795/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1796///
1797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu16&expand=3605)
1798#[inline]
1799#[target_feature(enable = "avx512bw,avx512vl")]
1800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1801#[cfg_attr(test, assert_instr(vpmaxuw))]
1802pub fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1803    unsafe {
1804        let max = _mm256_max_epu16(a, b).as_u16x16();
1805        transmute(simd_select_bitmask(k, max, u16x16::ZERO))
1806    }
1807}
1808
1809/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1810///
1811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu16&expand=3601)
1812#[inline]
1813#[target_feature(enable = "avx512bw,avx512vl")]
1814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1815#[cfg_attr(test, assert_instr(vpmaxuw))]
1816pub fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1817    unsafe {
1818        let max = _mm_max_epu16(a, b).as_u16x8();
1819        transmute(simd_select_bitmask(k, max, src.as_u16x8()))
1820    }
1821}
1822
1823/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1824///
1825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu16&expand=3602)
1826#[inline]
1827#[target_feature(enable = "avx512bw,avx512vl")]
1828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1829#[cfg_attr(test, assert_instr(vpmaxuw))]
1830pub fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1831    unsafe {
1832        let max = _mm_max_epu16(a, b).as_u16x8();
1833        transmute(simd_select_bitmask(k, max, u16x8::ZERO))
1834    }
1835}
1836
1837/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst.
1838///
1839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu8&expand=3636)
1840#[inline]
1841#[target_feature(enable = "avx512bw")]
1842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1843#[cfg_attr(test, assert_instr(vpmaxub))]
1844pub fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
1845    unsafe {
1846        let a = a.as_u8x64();
1847        let b = b.as_u8x64();
1848        transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
1849    }
1850}
1851
1852/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1853///
1854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu8&expand=3634)
1855#[inline]
1856#[target_feature(enable = "avx512bw")]
1857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1858#[cfg_attr(test, assert_instr(vpmaxub))]
1859pub fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1860    unsafe {
1861        let max = _mm512_max_epu8(a, b).as_u8x64();
1862        transmute(simd_select_bitmask(k, max, src.as_u8x64()))
1863    }
1864}
1865
1866/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1867///
1868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu8&expand=3635)
1869#[inline]
1870#[target_feature(enable = "avx512bw")]
1871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1872#[cfg_attr(test, assert_instr(vpmaxub))]
1873pub fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1874    unsafe {
1875        let max = _mm512_max_epu8(a, b).as_u8x64();
1876        transmute(simd_select_bitmask(k, max, u8x64::ZERO))
1877    }
1878}
1879
1880/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1881///
1882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu8&expand=3631)
1883#[inline]
1884#[target_feature(enable = "avx512bw,avx512vl")]
1885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1886#[cfg_attr(test, assert_instr(vpmaxub))]
1887pub fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1888    unsafe {
1889        let max = _mm256_max_epu8(a, b).as_u8x32();
1890        transmute(simd_select_bitmask(k, max, src.as_u8x32()))
1891    }
1892}
1893
1894/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1895///
1896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu8&expand=3632)
1897#[inline]
1898#[target_feature(enable = "avx512bw,avx512vl")]
1899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1900#[cfg_attr(test, assert_instr(vpmaxub))]
1901pub fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1902    unsafe {
1903        let max = _mm256_max_epu8(a, b).as_u8x32();
1904        transmute(simd_select_bitmask(k, max, u8x32::ZERO))
1905    }
1906}
1907
1908/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1909///
1910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu8&expand=3628)
1911#[inline]
1912#[target_feature(enable = "avx512bw,avx512vl")]
1913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1914#[cfg_attr(test, assert_instr(vpmaxub))]
1915pub fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1916    unsafe {
1917        let max = _mm_max_epu8(a, b).as_u8x16();
1918        transmute(simd_select_bitmask(k, max, src.as_u8x16()))
1919    }
1920}
1921
1922/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1923///
1924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu8&expand=3629)
1925#[inline]
1926#[target_feature(enable = "avx512bw,avx512vl")]
1927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1928#[cfg_attr(test, assert_instr(vpmaxub))]
1929pub fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1930    unsafe {
1931        let max = _mm_max_epu8(a, b).as_u8x16();
1932        transmute(simd_select_bitmask(k, max, u8x16::ZERO))
1933    }
1934}
1935
1936/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst.
1937///
1938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi16&expand=3573)
1939#[inline]
1940#[target_feature(enable = "avx512bw")]
1941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1942#[cfg_attr(test, assert_instr(vpmaxsw))]
1943pub fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
1944    unsafe {
1945        let a = a.as_i16x32();
1946        let b = b.as_i16x32();
1947        transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
1948    }
1949}
1950
1951/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1952///
1953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi16&expand=3571)
1954#[inline]
1955#[target_feature(enable = "avx512bw")]
1956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1957#[cfg_attr(test, assert_instr(vpmaxsw))]
1958pub fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1959    unsafe {
1960        let max = _mm512_max_epi16(a, b).as_i16x32();
1961        transmute(simd_select_bitmask(k, max, src.as_i16x32()))
1962    }
1963}
1964
1965/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1966///
1967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi16&expand=3572)
1968#[inline]
1969#[target_feature(enable = "avx512bw")]
1970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1971#[cfg_attr(test, assert_instr(vpmaxsw))]
1972pub fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1973    unsafe {
1974        let max = _mm512_max_epi16(a, b).as_i16x32();
1975        transmute(simd_select_bitmask(k, max, i16x32::ZERO))
1976    }
1977}
1978
1979/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1980///
1981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi16&expand=3568)
1982#[inline]
1983#[target_feature(enable = "avx512bw,avx512vl")]
1984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1985#[cfg_attr(test, assert_instr(vpmaxsw))]
1986pub fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1987    unsafe {
1988        let max = _mm256_max_epi16(a, b).as_i16x16();
1989        transmute(simd_select_bitmask(k, max, src.as_i16x16()))
1990    }
1991}
1992
1993/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1994///
1995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi16&expand=3569)
1996#[inline]
1997#[target_feature(enable = "avx512bw,avx512vl")]
1998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1999#[cfg_attr(test, assert_instr(vpmaxsw))]
2000pub fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2001    unsafe {
2002        let max = _mm256_max_epi16(a, b).as_i16x16();
2003        transmute(simd_select_bitmask(k, max, i16x16::ZERO))
2004    }
2005}
2006
2007/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2008///
2009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi16&expand=3565)
2010#[inline]
2011#[target_feature(enable = "avx512bw,avx512vl")]
2012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2013#[cfg_attr(test, assert_instr(vpmaxsw))]
2014pub fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2015    unsafe {
2016        let max = _mm_max_epi16(a, b).as_i16x8();
2017        transmute(simd_select_bitmask(k, max, src.as_i16x8()))
2018    }
2019}
2020
2021/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2022///
2023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi16&expand=3566)
2024#[inline]
2025#[target_feature(enable = "avx512bw,avx512vl")]
2026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2027#[cfg_attr(test, assert_instr(vpmaxsw))]
2028pub fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2029    unsafe {
2030        let max = _mm_max_epi16(a, b).as_i16x8();
2031        transmute(simd_select_bitmask(k, max, i16x8::ZERO))
2032    }
2033}
2034
2035/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst.
2036///
2037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi8&expand=3600)
2038#[inline]
2039#[target_feature(enable = "avx512bw")]
2040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2041#[cfg_attr(test, assert_instr(vpmaxsb))]
2042pub fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
2043    unsafe {
2044        let a = a.as_i8x64();
2045        let b = b.as_i8x64();
2046        transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
2047    }
2048}
2049
2050/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2051///
2052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi8&expand=3598)
2053#[inline]
2054#[target_feature(enable = "avx512bw")]
2055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2056#[cfg_attr(test, assert_instr(vpmaxsb))]
2057pub fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2058    unsafe {
2059        let max = _mm512_max_epi8(a, b).as_i8x64();
2060        transmute(simd_select_bitmask(k, max, src.as_i8x64()))
2061    }
2062}
2063
2064/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2065///
2066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi8&expand=3599)
2067#[inline]
2068#[target_feature(enable = "avx512bw")]
2069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2070#[cfg_attr(test, assert_instr(vpmaxsb))]
2071pub fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2072    unsafe {
2073        let max = _mm512_max_epi8(a, b).as_i8x64();
2074        transmute(simd_select_bitmask(k, max, i8x64::ZERO))
2075    }
2076}
2077
2078/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2079///
2080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi8&expand=3595)
2081#[inline]
2082#[target_feature(enable = "avx512bw,avx512vl")]
2083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2084#[cfg_attr(test, assert_instr(vpmaxsb))]
2085pub fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2086    unsafe {
2087        let max = _mm256_max_epi8(a, b).as_i8x32();
2088        transmute(simd_select_bitmask(k, max, src.as_i8x32()))
2089    }
2090}
2091
2092/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2093///
2094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi8&expand=3596)
2095#[inline]
2096#[target_feature(enable = "avx512bw,avx512vl")]
2097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2098#[cfg_attr(test, assert_instr(vpmaxsb))]
2099pub fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2100    unsafe {
2101        let max = _mm256_max_epi8(a, b).as_i8x32();
2102        transmute(simd_select_bitmask(k, max, i8x32::ZERO))
2103    }
2104}
2105
2106/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2107///
2108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi8&expand=3592)
2109#[inline]
2110#[target_feature(enable = "avx512bw,avx512vl")]
2111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2112#[cfg_attr(test, assert_instr(vpmaxsb))]
2113pub fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2114    unsafe {
2115        let max = _mm_max_epi8(a, b).as_i8x16();
2116        transmute(simd_select_bitmask(k, max, src.as_i8x16()))
2117    }
2118}
2119
2120/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2121///
2122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi8&expand=3593)
2123#[inline]
2124#[target_feature(enable = "avx512bw,avx512vl")]
2125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2126#[cfg_attr(test, assert_instr(vpmaxsb))]
2127pub fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2128    unsafe {
2129        let max = _mm_max_epi8(a, b).as_i8x16();
2130        transmute(simd_select_bitmask(k, max, i8x16::ZERO))
2131    }
2132}
2133
2134/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst.
2135///
2136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu16&expand=3723)
2137#[inline]
2138#[target_feature(enable = "avx512bw")]
2139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2140#[cfg_attr(test, assert_instr(vpminuw))]
2141pub fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
2142    unsafe {
2143        let a = a.as_u16x32();
2144        let b = b.as_u16x32();
2145        transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
2146    }
2147}
2148
2149/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2150///
2151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu16&expand=3721)
2152#[inline]
2153#[target_feature(enable = "avx512bw")]
2154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2155#[cfg_attr(test, assert_instr(vpminuw))]
2156pub fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2157    unsafe {
2158        let min = _mm512_min_epu16(a, b).as_u16x32();
2159        transmute(simd_select_bitmask(k, min, src.as_u16x32()))
2160    }
2161}
2162
2163/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2164///
2165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu16&expand=3722)
2166#[inline]
2167#[target_feature(enable = "avx512bw")]
2168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2169#[cfg_attr(test, assert_instr(vpminuw))]
2170pub fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2171    unsafe {
2172        let min = _mm512_min_epu16(a, b).as_u16x32();
2173        transmute(simd_select_bitmask(k, min, u16x32::ZERO))
2174    }
2175}
2176
2177/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2178///
2179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu16&expand=3718)
2180#[inline]
2181#[target_feature(enable = "avx512bw,avx512vl")]
2182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2183#[cfg_attr(test, assert_instr(vpminuw))]
2184pub fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2185    unsafe {
2186        let min = _mm256_min_epu16(a, b).as_u16x16();
2187        transmute(simd_select_bitmask(k, min, src.as_u16x16()))
2188    }
2189}
2190
2191/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2192///
2193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu16&expand=3719)
2194#[inline]
2195#[target_feature(enable = "avx512bw,avx512vl")]
2196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2197#[cfg_attr(test, assert_instr(vpminuw))]
2198pub fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2199    unsafe {
2200        let min = _mm256_min_epu16(a, b).as_u16x16();
2201        transmute(simd_select_bitmask(k, min, u16x16::ZERO))
2202    }
2203}
2204
2205/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2206///
2207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu16&expand=3715)
2208#[inline]
2209#[target_feature(enable = "avx512bw,avx512vl")]
2210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2211#[cfg_attr(test, assert_instr(vpminuw))]
2212pub fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2213    unsafe {
2214        let min = _mm_min_epu16(a, b).as_u16x8();
2215        transmute(simd_select_bitmask(k, min, src.as_u16x8()))
2216    }
2217}
2218
2219/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2220///
2221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu16&expand=3716)
2222#[inline]
2223#[target_feature(enable = "avx512bw,avx512vl")]
2224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2225#[cfg_attr(test, assert_instr(vpminuw))]
2226pub fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2227    unsafe {
2228        let min = _mm_min_epu16(a, b).as_u16x8();
2229        transmute(simd_select_bitmask(k, min, u16x8::ZERO))
2230    }
2231}
2232
2233/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst.
2234///
2235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu8&expand=3750)
2236#[inline]
2237#[target_feature(enable = "avx512bw")]
2238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2239#[cfg_attr(test, assert_instr(vpminub))]
2240pub fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
2241    unsafe {
2242        let a = a.as_u8x64();
2243        let b = b.as_u8x64();
2244        transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
2245    }
2246}
2247
2248/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2249///
2250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu8&expand=3748)
2251#[inline]
2252#[target_feature(enable = "avx512bw")]
2253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2254#[cfg_attr(test, assert_instr(vpminub))]
2255pub fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2256    unsafe {
2257        let min = _mm512_min_epu8(a, b).as_u8x64();
2258        transmute(simd_select_bitmask(k, min, src.as_u8x64()))
2259    }
2260}
2261
2262/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2263///
2264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu8&expand=3749)
2265#[inline]
2266#[target_feature(enable = "avx512bw")]
2267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2268#[cfg_attr(test, assert_instr(vpminub))]
2269pub fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2270    unsafe {
2271        let min = _mm512_min_epu8(a, b).as_u8x64();
2272        transmute(simd_select_bitmask(k, min, u8x64::ZERO))
2273    }
2274}
2275
2276/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2277///
2278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu8&expand=3745)
2279#[inline]
2280#[target_feature(enable = "avx512bw,avx512vl")]
2281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2282#[cfg_attr(test, assert_instr(vpminub))]
2283pub fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2284    unsafe {
2285        let min = _mm256_min_epu8(a, b).as_u8x32();
2286        transmute(simd_select_bitmask(k, min, src.as_u8x32()))
2287    }
2288}
2289
2290/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2291///
2292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu8&expand=3746)
2293#[inline]
2294#[target_feature(enable = "avx512bw,avx512vl")]
2295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2296#[cfg_attr(test, assert_instr(vpminub))]
2297pub fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2298    unsafe {
2299        let min = _mm256_min_epu8(a, b).as_u8x32();
2300        transmute(simd_select_bitmask(k, min, u8x32::ZERO))
2301    }
2302}
2303
2304/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2305///
2306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu8&expand=3742)
2307#[inline]
2308#[target_feature(enable = "avx512bw,avx512vl")]
2309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2310#[cfg_attr(test, assert_instr(vpminub))]
2311pub fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2312    unsafe {
2313        let min = _mm_min_epu8(a, b).as_u8x16();
2314        transmute(simd_select_bitmask(k, min, src.as_u8x16()))
2315    }
2316}
2317
2318/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2319///
2320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu8&expand=3743)
2321#[inline]
2322#[target_feature(enable = "avx512bw,avx512vl")]
2323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2324#[cfg_attr(test, assert_instr(vpminub))]
2325pub fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2326    unsafe {
2327        let min = _mm_min_epu8(a, b).as_u8x16();
2328        transmute(simd_select_bitmask(k, min, u8x16::ZERO))
2329    }
2330}
2331
2332/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst.
2333///
2334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi16&expand=3687)
2335#[inline]
2336#[target_feature(enable = "avx512bw")]
2337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2338#[cfg_attr(test, assert_instr(vpminsw))]
2339pub fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
2340    unsafe {
2341        let a = a.as_i16x32();
2342        let b = b.as_i16x32();
2343        transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
2344    }
2345}
2346
2347/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2348///
2349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi16&expand=3685)
2350#[inline]
2351#[target_feature(enable = "avx512bw")]
2352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2353#[cfg_attr(test, assert_instr(vpminsw))]
2354pub fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2355    unsafe {
2356        let min = _mm512_min_epi16(a, b).as_i16x32();
2357        transmute(simd_select_bitmask(k, min, src.as_i16x32()))
2358    }
2359}
2360
2361/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2362///
2363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi16&expand=3686)
2364#[inline]
2365#[target_feature(enable = "avx512bw")]
2366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2367#[cfg_attr(test, assert_instr(vpminsw))]
2368pub fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2369    unsafe {
2370        let min = _mm512_min_epi16(a, b).as_i16x32();
2371        transmute(simd_select_bitmask(k, min, i16x32::ZERO))
2372    }
2373}
2374
2375/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2376///
2377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi16&expand=3682)
2378#[inline]
2379#[target_feature(enable = "avx512bw,avx512vl")]
2380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2381#[cfg_attr(test, assert_instr(vpminsw))]
2382pub fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2383    unsafe {
2384        let min = _mm256_min_epi16(a, b).as_i16x16();
2385        transmute(simd_select_bitmask(k, min, src.as_i16x16()))
2386    }
2387}
2388
2389/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2390///
2391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi16&expand=3683)
2392#[inline]
2393#[target_feature(enable = "avx512bw,avx512vl")]
2394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2395#[cfg_attr(test, assert_instr(vpminsw))]
2396pub fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2397    unsafe {
2398        let min = _mm256_min_epi16(a, b).as_i16x16();
2399        transmute(simd_select_bitmask(k, min, i16x16::ZERO))
2400    }
2401}
2402
2403/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2404///
2405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi16&expand=3679)
2406#[inline]
2407#[target_feature(enable = "avx512bw,avx512vl")]
2408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2409#[cfg_attr(test, assert_instr(vpminsw))]
2410pub fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2411    unsafe {
2412        let min = _mm_min_epi16(a, b).as_i16x8();
2413        transmute(simd_select_bitmask(k, min, src.as_i16x8()))
2414    }
2415}
2416
2417/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2418///
2419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi16&expand=3680)
2420#[inline]
2421#[target_feature(enable = "avx512bw,avx512vl")]
2422#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2423#[cfg_attr(test, assert_instr(vpminsw))]
2424pub fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2425    unsafe {
2426        let min = _mm_min_epi16(a, b).as_i16x8();
2427        transmute(simd_select_bitmask(k, min, i16x8::ZERO))
2428    }
2429}
2430
2431/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst.
2432///
2433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi8&expand=3714)
2434#[inline]
2435#[target_feature(enable = "avx512bw")]
2436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2437#[cfg_attr(test, assert_instr(vpminsb))]
2438pub fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
2439    unsafe {
2440        let a = a.as_i8x64();
2441        let b = b.as_i8x64();
2442        transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
2443    }
2444}
2445
2446/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2447///
2448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi8&expand=3712)
2449#[inline]
2450#[target_feature(enable = "avx512bw")]
2451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2452#[cfg_attr(test, assert_instr(vpminsb))]
2453pub fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2454    unsafe {
2455        let min = _mm512_min_epi8(a, b).as_i8x64();
2456        transmute(simd_select_bitmask(k, min, src.as_i8x64()))
2457    }
2458}
2459
2460/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2461///
2462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi8&expand=3713)
2463#[inline]
2464#[target_feature(enable = "avx512bw")]
2465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2466#[cfg_attr(test, assert_instr(vpminsb))]
2467pub fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2468    unsafe {
2469        let min = _mm512_min_epi8(a, b).as_i8x64();
2470        transmute(simd_select_bitmask(k, min, i8x64::ZERO))
2471    }
2472}
2473
2474/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2475///
2476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi8&expand=3709)
2477#[inline]
2478#[target_feature(enable = "avx512bw,avx512vl")]
2479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2480#[cfg_attr(test, assert_instr(vpminsb))]
2481pub fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2482    unsafe {
2483        let min = _mm256_min_epi8(a, b).as_i8x32();
2484        transmute(simd_select_bitmask(k, min, src.as_i8x32()))
2485    }
2486}
2487
2488/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2489///
2490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi8&expand=3710)
2491#[inline]
2492#[target_feature(enable = "avx512bw,avx512vl")]
2493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2494#[cfg_attr(test, assert_instr(vpminsb))]
2495pub fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2496    unsafe {
2497        let min = _mm256_min_epi8(a, b).as_i8x32();
2498        transmute(simd_select_bitmask(k, min, i8x32::ZERO))
2499    }
2500}
2501
2502/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2503///
2504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi8&expand=3706)
2505#[inline]
2506#[target_feature(enable = "avx512bw,avx512vl")]
2507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2508#[cfg_attr(test, assert_instr(vpminsb))]
2509pub fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2510    unsafe {
2511        let min = _mm_min_epi8(a, b).as_i8x16();
2512        transmute(simd_select_bitmask(k, min, src.as_i8x16()))
2513    }
2514}
2515
2516/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2517///
2518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi8&expand=3707)
2519#[inline]
2520#[target_feature(enable = "avx512bw,avx512vl")]
2521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2522#[cfg_attr(test, assert_instr(vpminsb))]
2523pub fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2524    unsafe {
2525        let min = _mm_min_epi8(a, b).as_i8x16();
2526        transmute(simd_select_bitmask(k, min, i8x16::ZERO))
2527    }
2528}
2529
2530/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2531///
2532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu16_mask&expand=1050)
2533#[inline]
2534#[target_feature(enable = "avx512bw")]
2535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2536#[cfg_attr(test, assert_instr(vpcmp))]
2537pub fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2538    unsafe { simd_bitmask::<u16x32, _>(simd_lt(a.as_u16x32(), b.as_u16x32())) }
2539}
2540
2541/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2542///
2543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu16_mask&expand=1051)
2544#[inline]
2545#[target_feature(enable = "avx512bw")]
2546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2547#[cfg_attr(test, assert_instr(vpcmp))]
2548pub fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2549    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2550}
2551
2552/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2553///
2554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu16_mask&expand=1050)
2555#[inline]
2556#[target_feature(enable = "avx512bw,avx512vl")]
2557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2558#[cfg_attr(test, assert_instr(vpcmp))]
2559pub fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2560    unsafe { simd_bitmask::<u16x16, _>(simd_lt(a.as_u16x16(), b.as_u16x16())) }
2561}
2562
2563/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2564///
2565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu16_mask&expand=1049)
2566#[inline]
2567#[target_feature(enable = "avx512bw,avx512vl")]
2568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2569#[cfg_attr(test, assert_instr(vpcmp))]
2570pub fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2571    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2572}
2573
2574/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2575///
2576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu16_mask&expand=1018)
2577#[inline]
2578#[target_feature(enable = "avx512bw,avx512vl")]
2579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2580#[cfg_attr(test, assert_instr(vpcmp))]
2581pub fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2582    unsafe { simd_bitmask::<u16x8, _>(simd_lt(a.as_u16x8(), b.as_u16x8())) }
2583}
2584
2585/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2586///
2587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu16_mask&expand=1019)
2588#[inline]
2589#[target_feature(enable = "avx512bw,avx512vl")]
2590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2591#[cfg_attr(test, assert_instr(vpcmp))]
2592pub fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2593    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2594}
2595
2596/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2597///
2598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_cmplt_epu8_mask&expand=1068)
2599#[inline]
2600#[target_feature(enable = "avx512bw")]
2601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2602#[cfg_attr(test, assert_instr(vpcmp))]
2603pub fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2604    unsafe { simd_bitmask::<u8x64, _>(simd_lt(a.as_u8x64(), b.as_u8x64())) }
2605}
2606
2607/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2608///
2609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu8_mask&expand=1069)
2610#[inline]
2611#[target_feature(enable = "avx512bw")]
2612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2613#[cfg_attr(test, assert_instr(vpcmp))]
2614pub fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2615    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2616}
2617
2618/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2619///
2620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu8_mask&expand=1066)
2621#[inline]
2622#[target_feature(enable = "avx512bw,avx512vl")]
2623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2624#[cfg_attr(test, assert_instr(vpcmp))]
2625pub fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2626    unsafe { simd_bitmask::<u8x32, _>(simd_lt(a.as_u8x32(), b.as_u8x32())) }
2627}
2628
2629/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2630///
2631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu8_mask&expand=1067)
2632#[inline]
2633#[target_feature(enable = "avx512bw,avx512vl")]
2634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2635#[cfg_attr(test, assert_instr(vpcmp))]
2636pub fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2637    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2638}
2639
2640/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2641///
2642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu8_mask&expand=1064)
2643#[inline]
2644#[target_feature(enable = "avx512bw,avx512vl")]
2645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2646#[cfg_attr(test, assert_instr(vpcmp))]
2647pub fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2648    unsafe { simd_bitmask::<u8x16, _>(simd_lt(a.as_u8x16(), b.as_u8x16())) }
2649}
2650
2651/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2652///
2653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu8_mask&expand=1065)
2654#[inline]
2655#[target_feature(enable = "avx512bw,avx512vl")]
2656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2657#[cfg_attr(test, assert_instr(vpcmp))]
2658pub fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2659    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2660}
2661
2662/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2663///
2664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi16_mask&expand=1022)
2665#[inline]
2666#[target_feature(enable = "avx512bw")]
2667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2668#[cfg_attr(test, assert_instr(vpcmp))]
2669pub fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2670    unsafe { simd_bitmask::<i16x32, _>(simd_lt(a.as_i16x32(), b.as_i16x32())) }
2671}
2672
2673/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2674///
2675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi16_mask&expand=1023)
2676#[inline]
2677#[target_feature(enable = "avx512bw")]
2678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2679#[cfg_attr(test, assert_instr(vpcmp))]
2680pub fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2681    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2682}
2683
2684/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2685///
2686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi16_mask&expand=1020)
2687#[inline]
2688#[target_feature(enable = "avx512bw,avx512vl")]
2689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2690#[cfg_attr(test, assert_instr(vpcmp))]
2691pub fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2692    unsafe { simd_bitmask::<i16x16, _>(simd_lt(a.as_i16x16(), b.as_i16x16())) }
2693}
2694
2695/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2696///
2697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi16_mask&expand=1021)
2698#[inline]
2699#[target_feature(enable = "avx512bw,avx512vl")]
2700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2701#[cfg_attr(test, assert_instr(vpcmp))]
2702pub fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2703    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2704}
2705
2706/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2707///
2708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16_mask&expand=1018)
2709#[inline]
2710#[target_feature(enable = "avx512bw,avx512vl")]
2711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2712#[cfg_attr(test, assert_instr(vpcmp))]
2713pub fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2714    unsafe { simd_bitmask::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
2715}
2716
2717/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2718///
2719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi16_mask&expand=1019)
2720#[inline]
2721#[target_feature(enable = "avx512bw,avx512vl")]
2722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2723#[cfg_attr(test, assert_instr(vpcmp))]
2724pub fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2725    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2726}
2727
2728/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2729///
2730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi8_mask&expand=1044)
2731#[inline]
2732#[target_feature(enable = "avx512bw")]
2733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2734#[cfg_attr(test, assert_instr(vpcmp))]
2735pub fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2736    unsafe { simd_bitmask::<i8x64, _>(simd_lt(a.as_i8x64(), b.as_i8x64())) }
2737}
2738
2739/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2740///
2741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi8_mask&expand=1045)
2742#[inline]
2743#[target_feature(enable = "avx512bw")]
2744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2745#[cfg_attr(test, assert_instr(vpcmp))]
2746pub fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2747    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2748}
2749
2750/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2751///
2752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi8_mask&expand=1042)
2753#[inline]
2754#[target_feature(enable = "avx512bw,avx512vl")]
2755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2756#[cfg_attr(test, assert_instr(vpcmp))]
2757pub fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2758    unsafe { simd_bitmask::<i8x32, _>(simd_lt(a.as_i8x32(), b.as_i8x32())) }
2759}
2760
2761/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2762///
2763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi8_mask&expand=1043)
2764#[inline]
2765#[target_feature(enable = "avx512bw,avx512vl")]
2766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2767#[cfg_attr(test, assert_instr(vpcmp))]
2768pub fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2769    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2770}
2771
2772/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2773///
2774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8_mask&expand=1040)
2775#[inline]
2776#[target_feature(enable = "avx512bw,avx512vl")]
2777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2778#[cfg_attr(test, assert_instr(vpcmp))]
2779pub fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2780    unsafe { simd_bitmask::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
2781}
2782
2783/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2784///
2785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi8_mask&expand=1041)
2786#[inline]
2787#[target_feature(enable = "avx512bw,avx512vl")]
2788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2789#[cfg_attr(test, assert_instr(vpcmp))]
2790pub fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2791    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2792}
2793
2794/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2795///
2796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu16_mask&expand=927)
2797#[inline]
2798#[target_feature(enable = "avx512bw")]
2799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2800#[cfg_attr(test, assert_instr(vpcmp))]
2801pub fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2802    unsafe { simd_bitmask::<u16x32, _>(simd_gt(a.as_u16x32(), b.as_u16x32())) }
2803}
2804
2805/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2806///
2807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu16_mask&expand=928)
2808#[inline]
2809#[target_feature(enable = "avx512bw")]
2810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2811#[cfg_attr(test, assert_instr(vpcmp))]
2812pub fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2813    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2814}
2815
2816/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2817///
2818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu16_mask&expand=925)
2819#[inline]
2820#[target_feature(enable = "avx512bw,avx512vl")]
2821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2822#[cfg_attr(test, assert_instr(vpcmp))]
2823pub fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2824    unsafe { simd_bitmask::<u16x16, _>(simd_gt(a.as_u16x16(), b.as_u16x16())) }
2825}
2826
2827/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2828///
2829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu16_mask&expand=926)
2830#[inline]
2831#[target_feature(enable = "avx512bw,avx512vl")]
2832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2833#[cfg_attr(test, assert_instr(vpcmp))]
2834pub fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2835    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2836}
2837
2838/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2839///
2840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu16_mask&expand=923)
2841#[inline]
2842#[target_feature(enable = "avx512bw,avx512vl")]
2843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2844#[cfg_attr(test, assert_instr(vpcmp))]
2845pub fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2846    unsafe { simd_bitmask::<u16x8, _>(simd_gt(a.as_u16x8(), b.as_u16x8())) }
2847}
2848
2849/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2850///
2851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu16_mask&expand=924)
2852#[inline]
2853#[target_feature(enable = "avx512bw,avx512vl")]
2854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2855#[cfg_attr(test, assert_instr(vpcmp))]
2856pub fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2857    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2858}
2859
2860/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2861///
2862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu8_mask&expand=945)
2863#[inline]
2864#[target_feature(enable = "avx512bw")]
2865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2866#[cfg_attr(test, assert_instr(vpcmp))]
2867pub fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2868    unsafe { simd_bitmask::<u8x64, _>(simd_gt(a.as_u8x64(), b.as_u8x64())) }
2869}
2870
2871/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2872///
2873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu8_mask&expand=946)
2874#[inline]
2875#[target_feature(enable = "avx512bw")]
2876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2877#[cfg_attr(test, assert_instr(vpcmp))]
2878pub fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2879    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2880}
2881
2882/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2883///
2884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu8_mask&expand=943)
2885#[inline]
2886#[target_feature(enable = "avx512bw,avx512vl")]
2887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2888#[cfg_attr(test, assert_instr(vpcmp))]
2889pub fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2890    unsafe { simd_bitmask::<u8x32, _>(simd_gt(a.as_u8x32(), b.as_u8x32())) }
2891}
2892
2893/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2894///
2895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu8_mask&expand=944)
2896#[inline]
2897#[target_feature(enable = "avx512bw,avx512vl")]
2898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2899#[cfg_attr(test, assert_instr(vpcmp))]
2900pub fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2901    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2902}
2903
2904/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2905///
2906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu8_mask&expand=941)
2907#[inline]
2908#[target_feature(enable = "avx512bw,avx512vl")]
2909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2910#[cfg_attr(test, assert_instr(vpcmp))]
2911pub fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2912    unsafe { simd_bitmask::<u8x16, _>(simd_gt(a.as_u8x16(), b.as_u8x16())) }
2913}
2914
2915/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2916///
2917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu8_mask&expand=942)
2918#[inline]
2919#[target_feature(enable = "avx512bw,avx512vl")]
2920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2921#[cfg_attr(test, assert_instr(vpcmp))]
2922pub fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2923    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2924}
2925
2926/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2927///
2928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi16_mask&expand=897)
2929#[inline]
2930#[target_feature(enable = "avx512bw")]
2931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2932#[cfg_attr(test, assert_instr(vpcmp))]
2933pub fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2934    unsafe { simd_bitmask::<i16x32, _>(simd_gt(a.as_i16x32(), b.as_i16x32())) }
2935}
2936
2937/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2938///
2939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi16_mask&expand=898)
2940#[inline]
2941#[target_feature(enable = "avx512bw")]
2942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2943#[cfg_attr(test, assert_instr(vpcmp))]
2944pub fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2945    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2946}
2947
2948/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2949///
2950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16_mask&expand=895)
2951#[inline]
2952#[target_feature(enable = "avx512bw,avx512vl")]
2953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2954#[cfg_attr(test, assert_instr(vpcmp))]
2955pub fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2956    unsafe { simd_bitmask::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
2957}
2958
2959/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2960///
2961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi16_mask&expand=896)
2962#[inline]
2963#[target_feature(enable = "avx512bw,avx512vl")]
2964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2965#[cfg_attr(test, assert_instr(vpcmp))]
2966pub fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2967    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2968}
2969
2970/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2971///
2972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16_mask&expand=893)
2973#[inline]
2974#[target_feature(enable = "avx512bw,avx512vl")]
2975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2976#[cfg_attr(test, assert_instr(vpcmp))]
2977pub fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2978    unsafe { simd_bitmask::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
2979}
2980
2981/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2982///
2983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi16_mask&expand=894)
2984#[inline]
2985#[target_feature(enable = "avx512bw,avx512vl")]
2986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2987#[cfg_attr(test, assert_instr(vpcmp))]
2988pub fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2989    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2990}
2991
2992/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2993///
2994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi8_mask&expand=921)
2995#[inline]
2996#[target_feature(enable = "avx512bw")]
2997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2998#[cfg_attr(test, assert_instr(vpcmp))]
2999pub fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3000    unsafe { simd_bitmask::<i8x64, _>(simd_gt(a.as_i8x64(), b.as_i8x64())) }
3001}
3002
3003/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3004///
3005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi8_mask&expand=922)
3006#[inline]
3007#[target_feature(enable = "avx512bw")]
3008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3009#[cfg_attr(test, assert_instr(vpcmp))]
3010pub fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3011    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3012}
3013
3014/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3015///
3016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8_mask&expand=919)
3017#[inline]
3018#[target_feature(enable = "avx512bw,avx512vl")]
3019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3020#[cfg_attr(test, assert_instr(vpcmp))]
3021pub fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3022    unsafe { simd_bitmask::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
3023}
3024
3025/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3026///
3027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi8_mask&expand=920)
3028#[inline]
3029#[target_feature(enable = "avx512bw,avx512vl")]
3030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3031#[cfg_attr(test, assert_instr(vpcmp))]
3032pub fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3033    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3034}
3035
3036/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3037///
3038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8_mask&expand=917)
3039#[inline]
3040#[target_feature(enable = "avx512bw,avx512vl")]
3041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3042#[cfg_attr(test, assert_instr(vpcmp))]
3043pub fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3044    unsafe { simd_bitmask::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
3045}
3046
3047/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3048///
3049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi8_mask&expand=918)
3050#[inline]
3051#[target_feature(enable = "avx512bw,avx512vl")]
3052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3053#[cfg_attr(test, assert_instr(vpcmp))]
3054pub fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3055    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3056}
3057
3058/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3059///
3060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu16_mask&expand=989)
3061#[inline]
3062#[target_feature(enable = "avx512bw")]
3063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3064#[cfg_attr(test, assert_instr(vpcmp))]
3065pub fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3066    unsafe { simd_bitmask::<u16x32, _>(simd_le(a.as_u16x32(), b.as_u16x32())) }
3067}
3068
3069/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3070///
3071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu16_mask&expand=990)
3072#[inline]
3073#[target_feature(enable = "avx512bw")]
3074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3075#[cfg_attr(test, assert_instr(vpcmp))]
3076pub fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3077    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3078}
3079
3080/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3081///
3082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu16_mask&expand=987)
3083#[inline]
3084#[target_feature(enable = "avx512bw,avx512vl")]
3085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3086#[cfg_attr(test, assert_instr(vpcmp))]
3087pub fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3088    unsafe { simd_bitmask::<u16x16, _>(simd_le(a.as_u16x16(), b.as_u16x16())) }
3089}
3090
3091/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3092///
3093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu16_mask&expand=988)
3094#[inline]
3095#[target_feature(enable = "avx512bw,avx512vl")]
3096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3097#[cfg_attr(test, assert_instr(vpcmp))]
3098pub fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3099    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3100}
3101
3102/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3103///
3104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu16_mask&expand=985)
3105#[inline]
3106#[target_feature(enable = "avx512bw,avx512vl")]
3107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3108#[cfg_attr(test, assert_instr(vpcmp))]
3109pub fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3110    unsafe { simd_bitmask::<u16x8, _>(simd_le(a.as_u16x8(), b.as_u16x8())) }
3111}
3112
3113/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3114///
3115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu16_mask&expand=986)
3116#[inline]
3117#[target_feature(enable = "avx512bw,avx512vl")]
3118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3119#[cfg_attr(test, assert_instr(vpcmp))]
3120pub fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3121    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3122}
3123
3124/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3125///
3126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu8_mask&expand=1007)
3127#[inline]
3128#[target_feature(enable = "avx512bw")]
3129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3130#[cfg_attr(test, assert_instr(vpcmp))]
3131pub fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3132    unsafe { simd_bitmask::<u8x64, _>(simd_le(a.as_u8x64(), b.as_u8x64())) }
3133}
3134
3135/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3136///
3137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu8_mask&expand=1008)
3138#[inline]
3139#[target_feature(enable = "avx512bw")]
3140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3141#[cfg_attr(test, assert_instr(vpcmp))]
3142pub fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3143    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3144}
3145
3146/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3147///
3148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu8_mask&expand=1005)
3149#[inline]
3150#[target_feature(enable = "avx512bw,avx512vl")]
3151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3152#[cfg_attr(test, assert_instr(vpcmp))]
3153pub fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3154    unsafe { simd_bitmask::<u8x32, _>(simd_le(a.as_u8x32(), b.as_u8x32())) }
3155}
3156
3157/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3158///
3159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu8_mask&expand=1006)
3160#[inline]
3161#[target_feature(enable = "avx512bw,avx512vl")]
3162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3163#[cfg_attr(test, assert_instr(vpcmp))]
3164pub fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3165    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3166}
3167
3168/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3169///
3170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu8_mask&expand=1003)
3171#[inline]
3172#[target_feature(enable = "avx512bw,avx512vl")]
3173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3174#[cfg_attr(test, assert_instr(vpcmp))]
3175pub fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3176    unsafe { simd_bitmask::<u8x16, _>(simd_le(a.as_u8x16(), b.as_u8x16())) }
3177}
3178
3179/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3180///
3181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu8_mask&expand=1004)
3182#[inline]
3183#[target_feature(enable = "avx512bw,avx512vl")]
3184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3185#[cfg_attr(test, assert_instr(vpcmp))]
3186pub fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3187    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3188}
3189
3190/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3191///
3192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi16_mask&expand=965)
3193#[inline]
3194#[target_feature(enable = "avx512bw")]
3195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3196#[cfg_attr(test, assert_instr(vpcmp))]
3197pub fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3198    unsafe { simd_bitmask::<i16x32, _>(simd_le(a.as_i16x32(), b.as_i16x32())) }
3199}
3200
3201/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3202///
3203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi16_mask&expand=966)
3204#[inline]
3205#[target_feature(enable = "avx512bw")]
3206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3207#[cfg_attr(test, assert_instr(vpcmp))]
3208pub fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3209    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3210}
3211
3212/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3213///
3214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi16_mask&expand=963)
3215#[inline]
3216#[target_feature(enable = "avx512bw,avx512vl")]
3217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3218#[cfg_attr(test, assert_instr(vpcmp))]
3219pub fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3220    unsafe { simd_bitmask::<i16x16, _>(simd_le(a.as_i16x16(), b.as_i16x16())) }
3221}
3222
3223/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3224///
3225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi16_mask&expand=964)
3226#[inline]
3227#[target_feature(enable = "avx512bw,avx512vl")]
3228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3229#[cfg_attr(test, assert_instr(vpcmp))]
3230pub fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3231    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3232}
3233
3234/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3235///
3236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi16_mask&expand=961)
3237#[inline]
3238#[target_feature(enable = "avx512bw,avx512vl")]
3239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3240#[cfg_attr(test, assert_instr(vpcmp))]
3241pub fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3242    unsafe { simd_bitmask::<i16x8, _>(simd_le(a.as_i16x8(), b.as_i16x8())) }
3243}
3244
3245/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3246///
3247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi16_mask&expand=962)
3248#[inline]
3249#[target_feature(enable = "avx512bw,avx512vl")]
3250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3251#[cfg_attr(test, assert_instr(vpcmp))]
3252pub fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3253    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3254}
3255
3256/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3257///
3258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi8_mask&expand=983)
3259#[inline]
3260#[target_feature(enable = "avx512bw")]
3261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3262#[cfg_attr(test, assert_instr(vpcmp))]
3263pub fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3264    unsafe { simd_bitmask::<i8x64, _>(simd_le(a.as_i8x64(), b.as_i8x64())) }
3265}
3266
3267/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3268///
3269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi8_mask&expand=984)
3270#[inline]
3271#[target_feature(enable = "avx512bw")]
3272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3273#[cfg_attr(test, assert_instr(vpcmp))]
3274pub fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3275    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3276}
3277
3278/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3279///
3280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi8_mask&expand=981)
3281#[inline]
3282#[target_feature(enable = "avx512bw,avx512vl")]
3283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3284#[cfg_attr(test, assert_instr(vpcmp))]
3285pub fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3286    unsafe { simd_bitmask::<i8x32, _>(simd_le(a.as_i8x32(), b.as_i8x32())) }
3287}
3288
3289/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3290///
3291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi8_mask&expand=982)
3292#[inline]
3293#[target_feature(enable = "avx512bw,avx512vl")]
3294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3295#[cfg_attr(test, assert_instr(vpcmp))]
3296pub fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3297    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3298}
3299
3300/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3301///
3302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi8_mask&expand=979)
3303#[inline]
3304#[target_feature(enable = "avx512bw,avx512vl")]
3305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3306#[cfg_attr(test, assert_instr(vpcmp))]
3307pub fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3308    unsafe { simd_bitmask::<i8x16, _>(simd_le(a.as_i8x16(), b.as_i8x16())) }
3309}
3310
3311/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3312///
3313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi8_mask&expand=980)
3314#[inline]
3315#[target_feature(enable = "avx512bw,avx512vl")]
3316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3317#[cfg_attr(test, assert_instr(vpcmp))]
3318pub fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3319    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3320}
3321
3322/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3323///
3324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu16_mask&expand=867)
3325#[inline]
3326#[target_feature(enable = "avx512bw")]
3327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3328#[cfg_attr(test, assert_instr(vpcmp))]
3329pub fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3330    unsafe { simd_bitmask::<u16x32, _>(simd_ge(a.as_u16x32(), b.as_u16x32())) }
3331}
3332
3333/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3334///
3335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu16_mask&expand=868)
3336#[inline]
3337#[target_feature(enable = "avx512bw")]
3338#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3339#[cfg_attr(test, assert_instr(vpcmp))]
3340pub fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3341    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3342}
3343
3344/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3345///
3346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu16_mask&expand=865)
3347#[inline]
3348#[target_feature(enable = "avx512bw,avx512vl")]
3349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3350#[cfg_attr(test, assert_instr(vpcmp))]
3351pub fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3352    unsafe { simd_bitmask::<u16x16, _>(simd_ge(a.as_u16x16(), b.as_u16x16())) }
3353}
3354
3355/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3356///
3357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu16_mask&expand=866)
3358#[inline]
3359#[target_feature(enable = "avx512bw,avx512vl")]
3360#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3361#[cfg_attr(test, assert_instr(vpcmp))]
3362pub fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3363    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3364}
3365
3366/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3367///
3368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu16_mask&expand=863)
3369#[inline]
3370#[target_feature(enable = "avx512bw,avx512vl")]
3371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3372#[cfg_attr(test, assert_instr(vpcmp))]
3373pub fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3374    unsafe { simd_bitmask::<u16x8, _>(simd_ge(a.as_u16x8(), b.as_u16x8())) }
3375}
3376
3377/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3378///
3379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu16_mask&expand=864)
3380#[inline]
3381#[target_feature(enable = "avx512bw,avx512vl")]
3382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3383#[cfg_attr(test, assert_instr(vpcmp))]
3384pub fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3385    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3386}
3387
3388/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3389///
3390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu8_mask&expand=885)
3391#[inline]
3392#[target_feature(enable = "avx512bw")]
3393#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3394#[cfg_attr(test, assert_instr(vpcmp))]
3395pub fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3396    unsafe { simd_bitmask::<u8x64, _>(simd_ge(a.as_u8x64(), b.as_u8x64())) }
3397}
3398
3399/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3400///
3401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu8_mask&expand=886)
3402#[inline]
3403#[target_feature(enable = "avx512bw")]
3404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3405#[cfg_attr(test, assert_instr(vpcmp))]
3406pub fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3407    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3408}
3409
3410/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3411///
3412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu8_mask&expand=883)
3413#[inline]
3414#[target_feature(enable = "avx512bw,avx512vl")]
3415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3416#[cfg_attr(test, assert_instr(vpcmp))]
3417pub fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3418    unsafe { simd_bitmask::<u8x32, _>(simd_ge(a.as_u8x32(), b.as_u8x32())) }
3419}
3420
3421/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3422///
3423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu8_mask&expand=884)
3424#[inline]
3425#[target_feature(enable = "avx512bw,avx512vl")]
3426#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3427#[cfg_attr(test, assert_instr(vpcmp))]
3428pub fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3429    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3430}
3431
3432/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3433///
3434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu8_mask&expand=881)
3435#[inline]
3436#[target_feature(enable = "avx512bw,avx512vl")]
3437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3438#[cfg_attr(test, assert_instr(vpcmp))]
3439pub fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3440    unsafe { simd_bitmask::<u8x16, _>(simd_ge(a.as_u8x16(), b.as_u8x16())) }
3441}
3442
3443/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3444///
3445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu8_mask&expand=882)
3446#[inline]
3447#[target_feature(enable = "avx512bw,avx512vl")]
3448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3449#[cfg_attr(test, assert_instr(vpcmp))]
3450pub fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3451    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3452}
3453
3454/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3455///
3456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi16_mask&expand=843)
3457#[inline]
3458#[target_feature(enable = "avx512bw")]
3459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3460#[cfg_attr(test, assert_instr(vpcmp))]
3461pub fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3462    unsafe { simd_bitmask::<i16x32, _>(simd_ge(a.as_i16x32(), b.as_i16x32())) }
3463}
3464
3465/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3466///
3467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi16_mask&expand=844)
3468#[inline]
3469#[target_feature(enable = "avx512bw")]
3470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3471#[cfg_attr(test, assert_instr(vpcmp))]
3472pub fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3473    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3474}
3475
3476/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3477///
3478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi16_mask&expand=841)
3479#[inline]
3480#[target_feature(enable = "avx512bw,avx512vl")]
3481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3482#[cfg_attr(test, assert_instr(vpcmp))]
3483pub fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3484    unsafe { simd_bitmask::<i16x16, _>(simd_ge(a.as_i16x16(), b.as_i16x16())) }
3485}
3486
3487/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3488///
3489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi16_mask&expand=842)
3490#[inline]
3491#[target_feature(enable = "avx512bw,avx512vl")]
3492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3493#[cfg_attr(test, assert_instr(vpcmp))]
3494pub fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3495    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3496}
3497
3498/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3499///
3500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi16_mask&expand=839)
3501#[inline]
3502#[target_feature(enable = "avx512bw,avx512vl")]
3503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3504#[cfg_attr(test, assert_instr(vpcmp))]
3505pub fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3506    unsafe { simd_bitmask::<i16x8, _>(simd_ge(a.as_i16x8(), b.as_i16x8())) }
3507}
3508
3509/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3510///
3511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi16_mask&expand=840)
3512#[inline]
3513#[target_feature(enable = "avx512bw,avx512vl")]
3514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3515#[cfg_attr(test, assert_instr(vpcmp))]
3516pub fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3517    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3518}
3519
3520/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3521///
3522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi8_mask&expand=861)
3523#[inline]
3524#[target_feature(enable = "avx512bw")]
3525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3526#[cfg_attr(test, assert_instr(vpcmp))]
3527pub fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3528    unsafe { simd_bitmask::<i8x64, _>(simd_ge(a.as_i8x64(), b.as_i8x64())) }
3529}
3530
3531/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3532///
3533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi8_mask&expand=862)
3534#[inline]
3535#[target_feature(enable = "avx512bw")]
3536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3537#[cfg_attr(test, assert_instr(vpcmp))]
3538pub fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3539    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3540}
3541
3542/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3543///
3544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi8_mask&expand=859)
3545#[inline]
3546#[target_feature(enable = "avx512bw,avx512vl")]
3547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3548#[cfg_attr(test, assert_instr(vpcmp))]
3549pub fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3550    unsafe { simd_bitmask::<i8x32, _>(simd_ge(a.as_i8x32(), b.as_i8x32())) }
3551}
3552
3553/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3554///
3555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi8_mask&expand=860)
3556#[inline]
3557#[target_feature(enable = "avx512bw,avx512vl")]
3558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3559#[cfg_attr(test, assert_instr(vpcmp))]
3560pub fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3561    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3562}
3563
3564/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3565///
3566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi8_mask&expand=857)
3567#[inline]
3568#[target_feature(enable = "avx512bw,avx512vl")]
3569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3570#[cfg_attr(test, assert_instr(vpcmp))]
3571pub fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3572    unsafe { simd_bitmask::<i8x16, _>(simd_ge(a.as_i8x16(), b.as_i8x16())) }
3573}
3574
3575/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3576///
3577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi8_mask&expand=858)
3578#[inline]
3579#[target_feature(enable = "avx512bw,avx512vl")]
3580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3581#[cfg_attr(test, assert_instr(vpcmp))]
3582pub fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3583    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3584}
3585
3586/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3587///
3588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu16_mask&expand=801)
3589#[inline]
3590#[target_feature(enable = "avx512bw")]
3591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3592#[cfg_attr(test, assert_instr(vpcmp))]
3593pub fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3594    unsafe { simd_bitmask::<u16x32, _>(simd_eq(a.as_u16x32(), b.as_u16x32())) }
3595}
3596
3597/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3598///
3599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu16_mask&expand=802)
3600#[inline]
3601#[target_feature(enable = "avx512bw")]
3602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3603#[cfg_attr(test, assert_instr(vpcmp))]
3604pub fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3605    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3606}
3607
3608/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3609///
3610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu16_mask&expand=799)
3611#[inline]
3612#[target_feature(enable = "avx512bw,avx512vl")]
3613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3614#[cfg_attr(test, assert_instr(vpcmp))]
3615pub fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3616    unsafe { simd_bitmask::<u16x16, _>(simd_eq(a.as_u16x16(), b.as_u16x16())) }
3617}
3618
3619/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3620///
3621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu16_mask&expand=800)
3622#[inline]
3623#[target_feature(enable = "avx512bw,avx512vl")]
3624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3625#[cfg_attr(test, assert_instr(vpcmp))]
3626pub fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3627    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3628}
3629
3630/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3631///
3632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu16_mask&expand=797)
3633#[inline]
3634#[target_feature(enable = "avx512bw,avx512vl")]
3635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3636#[cfg_attr(test, assert_instr(vpcmp))]
3637pub fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3638    unsafe { simd_bitmask::<u16x8, _>(simd_eq(a.as_u16x8(), b.as_u16x8())) }
3639}
3640
3641/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3642///
3643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu16_mask&expand=798)
3644#[inline]
3645#[target_feature(enable = "avx512bw,avx512vl")]
3646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3647#[cfg_attr(test, assert_instr(vpcmp))]
3648pub fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3649    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3650}
3651
3652/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3653///
3654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu8_mask&expand=819)
3655#[inline]
3656#[target_feature(enable = "avx512bw")]
3657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3658#[cfg_attr(test, assert_instr(vpcmp))]
3659pub fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3660    unsafe { simd_bitmask::<u8x64, _>(simd_eq(a.as_u8x64(), b.as_u8x64())) }
3661}
3662
3663/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3664///
3665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu8_mask&expand=820)
3666#[inline]
3667#[target_feature(enable = "avx512bw")]
3668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3669#[cfg_attr(test, assert_instr(vpcmp))]
3670pub fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3671    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3672}
3673
3674/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3675///
3676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu8_mask&expand=817)
3677#[inline]
3678#[target_feature(enable = "avx512bw,avx512vl")]
3679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3680#[cfg_attr(test, assert_instr(vpcmp))]
3681pub fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3682    unsafe { simd_bitmask::<u8x32, _>(simd_eq(a.as_u8x32(), b.as_u8x32())) }
3683}
3684
3685/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3686///
3687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu8_mask&expand=818)
3688#[inline]
3689#[target_feature(enable = "avx512bw,avx512vl")]
3690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3691#[cfg_attr(test, assert_instr(vpcmp))]
3692pub fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3693    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3694}
3695
3696/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3697///
3698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu8_mask&expand=815)
3699#[inline]
3700#[target_feature(enable = "avx512bw,avx512vl")]
3701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3702#[cfg_attr(test, assert_instr(vpcmp))]
3703pub fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3704    unsafe { simd_bitmask::<u8x16, _>(simd_eq(a.as_u8x16(), b.as_u8x16())) }
3705}
3706
3707/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3708///
3709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu8_mask&expand=816)
3710#[inline]
3711#[target_feature(enable = "avx512bw,avx512vl")]
3712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3713#[cfg_attr(test, assert_instr(vpcmp))]
3714pub fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3715    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3716}
3717
3718/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3719///
3720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi16_mask&expand=771)
3721#[inline]
3722#[target_feature(enable = "avx512bw")]
3723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3724#[cfg_attr(test, assert_instr(vpcmp))]
3725pub fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3726    unsafe { simd_bitmask::<i16x32, _>(simd_eq(a.as_i16x32(), b.as_i16x32())) }
3727}
3728
3729/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3730///
3731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi16_mask&expand=772)
3732#[inline]
3733#[target_feature(enable = "avx512bw")]
3734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3735#[cfg_attr(test, assert_instr(vpcmp))]
3736pub fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3737    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3738}
3739
3740/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3741///
3742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16_mask&expand=769)
3743#[inline]
3744#[target_feature(enable = "avx512bw,avx512vl")]
3745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3746#[cfg_attr(test, assert_instr(vpcmp))]
3747pub fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3748    unsafe { simd_bitmask::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
3749}
3750
3751/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3752///
3753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi16_mask&expand=770)
3754#[inline]
3755#[target_feature(enable = "avx512bw,avx512vl")]
3756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3757#[cfg_attr(test, assert_instr(vpcmp))]
3758pub fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3759    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3760}
3761
3762/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3763///
3764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16_mask&expand=767)
3765#[inline]
3766#[target_feature(enable = "avx512bw,avx512vl")]
3767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3768#[cfg_attr(test, assert_instr(vpcmp))]
3769pub fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3770    unsafe { simd_bitmask::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
3771}
3772
3773/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3774///
3775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi16_mask&expand=768)
3776#[inline]
3777#[target_feature(enable = "avx512bw,avx512vl")]
3778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3779#[cfg_attr(test, assert_instr(vpcmp))]
3780pub fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3781    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3782}
3783
3784/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3785///
3786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi8_mask&expand=795)
3787#[inline]
3788#[target_feature(enable = "avx512bw")]
3789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3790#[cfg_attr(test, assert_instr(vpcmp))]
3791pub fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3792    unsafe { simd_bitmask::<i8x64, _>(simd_eq(a.as_i8x64(), b.as_i8x64())) }
3793}
3794
3795/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3796///
3797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi8_mask&expand=796)
3798#[inline]
3799#[target_feature(enable = "avx512bw")]
3800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3801#[cfg_attr(test, assert_instr(vpcmp))]
3802pub fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3803    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3804}
3805
3806/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3807///
3808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8_mask&expand=793)
3809#[inline]
3810#[target_feature(enable = "avx512bw,avx512vl")]
3811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3812#[cfg_attr(test, assert_instr(vpcmp))]
3813pub fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3814    unsafe { simd_bitmask::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
3815}
3816
3817/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3818///
3819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi8_mask&expand=794)
3820#[inline]
3821#[target_feature(enable = "avx512bw,avx512vl")]
3822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3823#[cfg_attr(test, assert_instr(vpcmp))]
3824pub fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3825    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3826}
3827
3828/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3829///
3830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8_mask&expand=791)
3831#[inline]
3832#[target_feature(enable = "avx512bw,avx512vl")]
3833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3834#[cfg_attr(test, assert_instr(vpcmp))]
3835pub fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3836    unsafe { simd_bitmask::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
3837}
3838
3839/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3840///
3841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi8_mask&expand=792)
3842#[inline]
3843#[target_feature(enable = "avx512bw,avx512vl")]
3844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3845#[cfg_attr(test, assert_instr(vpcmp))]
3846pub fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3847    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3848}
3849
3850/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3851///
3852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu16_mask&expand=1106)
3853#[inline]
3854#[target_feature(enable = "avx512bw")]
3855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3856#[cfg_attr(test, assert_instr(vpcmp))]
3857pub fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3858    unsafe { simd_bitmask::<u16x32, _>(simd_ne(a.as_u16x32(), b.as_u16x32())) }
3859}
3860
3861/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3862///
3863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu16_mask&expand=1107)
3864#[inline]
3865#[target_feature(enable = "avx512bw")]
3866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3867#[cfg_attr(test, assert_instr(vpcmp))]
3868pub fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3869    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3870}
3871
3872/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3873///
3874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu16_mask&expand=1104)
3875#[inline]
3876#[target_feature(enable = "avx512bw,avx512vl")]
3877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3878#[cfg_attr(test, assert_instr(vpcmp))]
3879pub fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3880    unsafe { simd_bitmask::<u16x16, _>(simd_ne(a.as_u16x16(), b.as_u16x16())) }
3881}
3882
3883/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3884///
3885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu16_mask&expand=1105)
3886#[inline]
3887#[target_feature(enable = "avx512bw,avx512vl")]
3888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3889#[cfg_attr(test, assert_instr(vpcmp))]
3890pub fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3891    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3892}
3893
3894/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3895///
3896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu16_mask&expand=1102)
3897#[inline]
3898#[target_feature(enable = "avx512bw,avx512vl")]
3899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3900#[cfg_attr(test, assert_instr(vpcmp))]
3901pub fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3902    unsafe { simd_bitmask::<u16x8, _>(simd_ne(a.as_u16x8(), b.as_u16x8())) }
3903}
3904
3905/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3906///
3907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu16_mask&expand=1103)
3908#[inline]
3909#[target_feature(enable = "avx512bw,avx512vl")]
3910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3911#[cfg_attr(test, assert_instr(vpcmp))]
3912pub fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3913    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3914}
3915
3916/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3917///
3918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu8_mask&expand=1124)
3919#[inline]
3920#[target_feature(enable = "avx512bw")]
3921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3922#[cfg_attr(test, assert_instr(vpcmp))]
3923pub fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3924    unsafe { simd_bitmask::<u8x64, _>(simd_ne(a.as_u8x64(), b.as_u8x64())) }
3925}
3926
3927/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3928///
3929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu8_mask&expand=1125)
3930#[inline]
3931#[target_feature(enable = "avx512bw")]
3932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3933#[cfg_attr(test, assert_instr(vpcmp))]
3934pub fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3935    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3936}
3937
3938/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3939///
3940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu8_mask&expand=1122)
3941#[inline]
3942#[target_feature(enable = "avx512bw,avx512vl")]
3943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3944#[cfg_attr(test, assert_instr(vpcmp))]
3945pub fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3946    unsafe { simd_bitmask::<u8x32, _>(simd_ne(a.as_u8x32(), b.as_u8x32())) }
3947}
3948
3949/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3950///
3951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu8_mask&expand=1123)
3952#[inline]
3953#[target_feature(enable = "avx512bw,avx512vl")]
3954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3955#[cfg_attr(test, assert_instr(vpcmp))]
3956pub fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3957    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3958}
3959
3960/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3961///
3962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu8_mask&expand=1120)
3963#[inline]
3964#[target_feature(enable = "avx512bw,avx512vl")]
3965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3966#[cfg_attr(test, assert_instr(vpcmp))]
3967pub fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3968    unsafe { simd_bitmask::<u8x16, _>(simd_ne(a.as_u8x16(), b.as_u8x16())) }
3969}
3970
3971/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3972///
3973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu8_mask&expand=1121)
3974#[inline]
3975#[target_feature(enable = "avx512bw,avx512vl")]
3976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3977#[cfg_attr(test, assert_instr(vpcmp))]
3978pub fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3979    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3980}
3981
3982/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3983///
3984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi16_mask&expand=1082)
3985#[inline]
3986#[target_feature(enable = "avx512bw")]
3987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3988#[cfg_attr(test, assert_instr(vpcmp))]
3989pub fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3990    unsafe { simd_bitmask::<i16x32, _>(simd_ne(a.as_i16x32(), b.as_i16x32())) }
3991}
3992
3993/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3994///
3995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi16_mask&expand=1083)
3996#[inline]
3997#[target_feature(enable = "avx512bw")]
3998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3999#[cfg_attr(test, assert_instr(vpcmp))]
4000pub fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4001    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4002}
4003
4004/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4005///
4006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi16_mask&expand=1080)
4007#[inline]
4008#[target_feature(enable = "avx512bw,avx512vl")]
4009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4010#[cfg_attr(test, assert_instr(vpcmp))]
4011pub fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4012    unsafe { simd_bitmask::<i16x16, _>(simd_ne(a.as_i16x16(), b.as_i16x16())) }
4013}
4014
4015/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4016///
4017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi16_mask&expand=1081)
4018#[inline]
4019#[target_feature(enable = "avx512bw,avx512vl")]
4020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4021#[cfg_attr(test, assert_instr(vpcmp))]
4022pub fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4023    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4024}
4025
4026/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4027///
4028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi16_mask&expand=1078)
4029#[inline]
4030#[target_feature(enable = "avx512bw,avx512vl")]
4031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4032#[cfg_attr(test, assert_instr(vpcmp))]
4033pub fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4034    unsafe { simd_bitmask::<i16x8, _>(simd_ne(a.as_i16x8(), b.as_i16x8())) }
4035}
4036
4037/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4038///
4039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi16_mask&expand=1079)
4040#[inline]
4041#[target_feature(enable = "avx512bw,avx512vl")]
4042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4043#[cfg_attr(test, assert_instr(vpcmp))]
4044pub fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4045    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4046}
4047
4048/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4049///
4050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi8_mask&expand=1100)
4051#[inline]
4052#[target_feature(enable = "avx512bw")]
4053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4054#[cfg_attr(test, assert_instr(vpcmp))]
4055pub fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4056    unsafe { simd_bitmask::<i8x64, _>(simd_ne(a.as_i8x64(), b.as_i8x64())) }
4057}
4058
4059/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4060///
4061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi8_mask&expand=1101)
4062#[inline]
4063#[target_feature(enable = "avx512bw")]
4064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4065#[cfg_attr(test, assert_instr(vpcmp))]
4066pub fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4067    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4068}
4069
4070/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4071///
4072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi8_mask&expand=1098)
4073#[inline]
4074#[target_feature(enable = "avx512bw,avx512vl")]
4075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4076#[cfg_attr(test, assert_instr(vpcmp))]
4077pub fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4078    unsafe { simd_bitmask::<i8x32, _>(simd_ne(a.as_i8x32(), b.as_i8x32())) }
4079}
4080
4081/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4082///
4083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi8_mask&expand=1099)
4084#[inline]
4085#[target_feature(enable = "avx512bw,avx512vl")]
4086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4087#[cfg_attr(test, assert_instr(vpcmp))]
4088pub fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4089    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4090}
4091
4092/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4093///
4094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi8_mask&expand=1096)
4095#[inline]
4096#[target_feature(enable = "avx512bw,avx512vl")]
4097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4098#[cfg_attr(test, assert_instr(vpcmp))]
4099pub fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4100    unsafe { simd_bitmask::<i8x16, _>(simd_ne(a.as_i8x16(), b.as_i8x16())) }
4101}
4102
4103/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4104///
4105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi8_mask&expand=1097)
4106#[inline]
4107#[target_feature(enable = "avx512bw,avx512vl")]
4108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4109#[cfg_attr(test, assert_instr(vpcmp))]
4110pub fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4111    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4112}
4113
4114/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k.
4115///
4116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu16_mask&expand=715)
4117#[inline]
4118#[target_feature(enable = "avx512bw")]
4119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4120#[rustc_legacy_const_generics(2)]
4121#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4122pub fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4123    unsafe {
4124        static_assert_uimm_bits!(IMM8, 3);
4125        let a = a.as_u16x32();
4126        let b = b.as_u16x32();
4127        let r = match IMM8 {
4128            0 => simd_eq(a, b),
4129            1 => simd_lt(a, b),
4130            2 => simd_le(a, b),
4131            3 => i16x32::ZERO,
4132            4 => simd_ne(a, b),
4133            5 => simd_ge(a, b),
4134            6 => simd_gt(a, b),
4135            _ => i16x32::splat(-1),
4136        };
4137        simd_bitmask(r)
4138    }
4139}
4140
4141/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4142///
4143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu16_mask&expand=716)
4144#[inline]
4145#[target_feature(enable = "avx512bw")]
4146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4147#[rustc_legacy_const_generics(3)]
4148#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4149pub fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
4150    k1: __mmask32,
4151    a: __m512i,
4152    b: __m512i,
4153) -> __mmask32 {
4154    unsafe {
4155        static_assert_uimm_bits!(IMM8, 3);
4156        let a = a.as_u16x32();
4157        let b = b.as_u16x32();
4158        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4159        let r = match IMM8 {
4160            0 => simd_and(k1, simd_eq(a, b)),
4161            1 => simd_and(k1, simd_lt(a, b)),
4162            2 => simd_and(k1, simd_le(a, b)),
4163            3 => i16x32::ZERO,
4164            4 => simd_and(k1, simd_ne(a, b)),
4165            5 => simd_and(k1, simd_ge(a, b)),
4166            6 => simd_and(k1, simd_gt(a, b)),
4167            _ => k1,
4168        };
4169        simd_bitmask(r)
4170    }
4171}
4172
4173/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4174///
4175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu16_mask&expand=713)
4176#[inline]
4177#[target_feature(enable = "avx512bw,avx512vl")]
4178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4179#[rustc_legacy_const_generics(2)]
4180#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4181pub fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4182    unsafe {
4183        static_assert_uimm_bits!(IMM8, 3);
4184        let a = a.as_u16x16();
4185        let b = b.as_u16x16();
4186        let r = match IMM8 {
4187            0 => simd_eq(a, b),
4188            1 => simd_lt(a, b),
4189            2 => simd_le(a, b),
4190            3 => i16x16::ZERO,
4191            4 => simd_ne(a, b),
4192            5 => simd_ge(a, b),
4193            6 => simd_gt(a, b),
4194            _ => i16x16::splat(-1),
4195        };
4196        simd_bitmask(r)
4197    }
4198}
4199
4200/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4201///
4202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu16_mask&expand=714)
4203#[inline]
4204#[target_feature(enable = "avx512bw,avx512vl")]
4205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4206#[rustc_legacy_const_generics(3)]
4207#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4208pub fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
4209    k1: __mmask16,
4210    a: __m256i,
4211    b: __m256i,
4212) -> __mmask16 {
4213    unsafe {
4214        static_assert_uimm_bits!(IMM8, 3);
4215        let a = a.as_u16x16();
4216        let b = b.as_u16x16();
4217        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4218        let r = match IMM8 {
4219            0 => simd_and(k1, simd_eq(a, b)),
4220            1 => simd_and(k1, simd_lt(a, b)),
4221            2 => simd_and(k1, simd_le(a, b)),
4222            3 => i16x16::ZERO,
4223            4 => simd_and(k1, simd_ne(a, b)),
4224            5 => simd_and(k1, simd_ge(a, b)),
4225            6 => simd_and(k1, simd_gt(a, b)),
4226            _ => k1,
4227        };
4228        simd_bitmask(r)
4229    }
4230}
4231
4232/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4233///
4234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu16_mask&expand=711)
4235#[inline]
4236#[target_feature(enable = "avx512bw,avx512vl")]
4237#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4238#[rustc_legacy_const_generics(2)]
4239#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4240pub fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4241    unsafe {
4242        static_assert_uimm_bits!(IMM8, 3);
4243        let a = a.as_u16x8();
4244        let b = b.as_u16x8();
4245        let r = match IMM8 {
4246            0 => simd_eq(a, b),
4247            1 => simd_lt(a, b),
4248            2 => simd_le(a, b),
4249            3 => i16x8::ZERO,
4250            4 => simd_ne(a, b),
4251            5 => simd_ge(a, b),
4252            6 => simd_gt(a, b),
4253            _ => i16x8::splat(-1),
4254        };
4255        simd_bitmask(r)
4256    }
4257}
4258
4259/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4260///
4261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu16_mask&expand=712)
4262#[inline]
4263#[target_feature(enable = "avx512bw,avx512vl")]
4264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4265#[rustc_legacy_const_generics(3)]
4266#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4267pub fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4268    unsafe {
4269        static_assert_uimm_bits!(IMM8, 3);
4270        let a = a.as_u16x8();
4271        let b = b.as_u16x8();
4272        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4273        let r = match IMM8 {
4274            0 => simd_and(k1, simd_eq(a, b)),
4275            1 => simd_and(k1, simd_lt(a, b)),
4276            2 => simd_and(k1, simd_le(a, b)),
4277            3 => i16x8::ZERO,
4278            4 => simd_and(k1, simd_ne(a, b)),
4279            5 => simd_and(k1, simd_ge(a, b)),
4280            6 => simd_and(k1, simd_gt(a, b)),
4281            _ => k1,
4282        };
4283        simd_bitmask(r)
4284    }
4285}
4286
4287/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4288///
4289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu8_mask&expand=733)
4290#[inline]
4291#[target_feature(enable = "avx512bw")]
4292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4293#[rustc_legacy_const_generics(2)]
4294#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4295pub fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4296    unsafe {
4297        static_assert_uimm_bits!(IMM8, 3);
4298        let a = a.as_u8x64();
4299        let b = b.as_u8x64();
4300        let r = match IMM8 {
4301            0 => simd_eq(a, b),
4302            1 => simd_lt(a, b),
4303            2 => simd_le(a, b),
4304            3 => i8x64::ZERO,
4305            4 => simd_ne(a, b),
4306            5 => simd_ge(a, b),
4307            6 => simd_gt(a, b),
4308            _ => i8x64::splat(-1),
4309        };
4310        simd_bitmask(r)
4311    }
4312}
4313
4314/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4315///
4316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu8_mask&expand=734)
4317#[inline]
4318#[target_feature(enable = "avx512bw")]
4319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4320#[rustc_legacy_const_generics(3)]
4321#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4322pub fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
4323    k1: __mmask64,
4324    a: __m512i,
4325    b: __m512i,
4326) -> __mmask64 {
4327    unsafe {
4328        static_assert_uimm_bits!(IMM8, 3);
4329        let a = a.as_u8x64();
4330        let b = b.as_u8x64();
4331        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4332        let r = match IMM8 {
4333            0 => simd_and(k1, simd_eq(a, b)),
4334            1 => simd_and(k1, simd_lt(a, b)),
4335            2 => simd_and(k1, simd_le(a, b)),
4336            3 => i8x64::ZERO,
4337            4 => simd_and(k1, simd_ne(a, b)),
4338            5 => simd_and(k1, simd_ge(a, b)),
4339            6 => simd_and(k1, simd_gt(a, b)),
4340            _ => k1,
4341        };
4342        simd_bitmask(r)
4343    }
4344}
4345
4346/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4347///
4348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu8_mask&expand=731)
4349#[inline]
4350#[target_feature(enable = "avx512bw,avx512vl")]
4351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4352#[rustc_legacy_const_generics(2)]
4353#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4354pub fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4355    unsafe {
4356        static_assert_uimm_bits!(IMM8, 3);
4357        let a = a.as_u8x32();
4358        let b = b.as_u8x32();
4359        let r = match IMM8 {
4360            0 => simd_eq(a, b),
4361            1 => simd_lt(a, b),
4362            2 => simd_le(a, b),
4363            3 => i8x32::ZERO,
4364            4 => simd_ne(a, b),
4365            5 => simd_ge(a, b),
4366            6 => simd_gt(a, b),
4367            _ => i8x32::splat(-1),
4368        };
4369        simd_bitmask(r)
4370    }
4371}
4372
4373/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4374///
4375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu8_mask&expand=732)
4376#[inline]
4377#[target_feature(enable = "avx512bw,avx512vl")]
4378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4379#[rustc_legacy_const_generics(3)]
4380#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4381pub fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
4382    k1: __mmask32,
4383    a: __m256i,
4384    b: __m256i,
4385) -> __mmask32 {
4386    unsafe {
4387        static_assert_uimm_bits!(IMM8, 3);
4388        let a = a.as_u8x32();
4389        let b = b.as_u8x32();
4390        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4391        let r = match IMM8 {
4392            0 => simd_and(k1, simd_eq(a, b)),
4393            1 => simd_and(k1, simd_lt(a, b)),
4394            2 => simd_and(k1, simd_le(a, b)),
4395            3 => i8x32::ZERO,
4396            4 => simd_and(k1, simd_ne(a, b)),
4397            5 => simd_and(k1, simd_ge(a, b)),
4398            6 => simd_and(k1, simd_gt(a, b)),
4399            _ => k1,
4400        };
4401        simd_bitmask(r)
4402    }
4403}
4404
4405/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4406///
4407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu8_mask&expand=729)
4408#[inline]
4409#[target_feature(enable = "avx512bw,avx512vl")]
4410#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4411#[rustc_legacy_const_generics(2)]
4412#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4413pub fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4414    unsafe {
4415        static_assert_uimm_bits!(IMM8, 3);
4416        let a = a.as_u8x16();
4417        let b = b.as_u8x16();
4418        let r = match IMM8 {
4419            0 => simd_eq(a, b),
4420            1 => simd_lt(a, b),
4421            2 => simd_le(a, b),
4422            3 => i8x16::ZERO,
4423            4 => simd_ne(a, b),
4424            5 => simd_ge(a, b),
4425            6 => simd_gt(a, b),
4426            _ => i8x16::splat(-1),
4427        };
4428        simd_bitmask(r)
4429    }
4430}
4431
4432/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4433///
4434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu8_mask&expand=730)
4435#[inline]
4436#[target_feature(enable = "avx512bw,avx512vl")]
4437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4438#[rustc_legacy_const_generics(3)]
4439#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4440pub fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4441    unsafe {
4442        static_assert_uimm_bits!(IMM8, 3);
4443        let a = a.as_u8x16();
4444        let b = b.as_u8x16();
4445        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4446        let r = match IMM8 {
4447            0 => simd_and(k1, simd_eq(a, b)),
4448            1 => simd_and(k1, simd_lt(a, b)),
4449            2 => simd_and(k1, simd_le(a, b)),
4450            3 => i8x16::ZERO,
4451            4 => simd_and(k1, simd_ne(a, b)),
4452            5 => simd_and(k1, simd_ge(a, b)),
4453            6 => simd_and(k1, simd_gt(a, b)),
4454            _ => k1,
4455        };
4456        simd_bitmask(r)
4457    }
4458}
4459
4460/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4461///
4462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi16_mask&expand=691)
4463#[inline]
4464#[target_feature(enable = "avx512bw")]
4465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4466#[rustc_legacy_const_generics(2)]
4467#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4468pub fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4469    unsafe {
4470        static_assert_uimm_bits!(IMM8, 3);
4471        let a = a.as_i16x32();
4472        let b = b.as_i16x32();
4473        let r = match IMM8 {
4474            0 => simd_eq(a, b),
4475            1 => simd_lt(a, b),
4476            2 => simd_le(a, b),
4477            3 => i16x32::ZERO,
4478            4 => simd_ne(a, b),
4479            5 => simd_ge(a, b),
4480            6 => simd_gt(a, b),
4481            _ => i16x32::splat(-1),
4482        };
4483        simd_bitmask(r)
4484    }
4485}
4486
4487/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4488///
4489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi16_mask&expand=692)
4490#[inline]
4491#[target_feature(enable = "avx512bw")]
4492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4493#[rustc_legacy_const_generics(3)]
4494#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4495pub fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
4496    k1: __mmask32,
4497    a: __m512i,
4498    b: __m512i,
4499) -> __mmask32 {
4500    unsafe {
4501        static_assert_uimm_bits!(IMM8, 3);
4502        let a = a.as_i16x32();
4503        let b = b.as_i16x32();
4504        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4505        let r = match IMM8 {
4506            0 => simd_and(k1, simd_eq(a, b)),
4507            1 => simd_and(k1, simd_lt(a, b)),
4508            2 => simd_and(k1, simd_le(a, b)),
4509            3 => i16x32::ZERO,
4510            4 => simd_and(k1, simd_ne(a, b)),
4511            5 => simd_and(k1, simd_ge(a, b)),
4512            6 => simd_and(k1, simd_gt(a, b)),
4513            _ => k1,
4514        };
4515        simd_bitmask(r)
4516    }
4517}
4518
4519/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4520///
4521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi16_mask&expand=689)
4522#[inline]
4523#[target_feature(enable = "avx512bw,avx512vl")]
4524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4525#[rustc_legacy_const_generics(2)]
4526#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4527pub fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4528    unsafe {
4529        static_assert_uimm_bits!(IMM8, 3);
4530        let a = a.as_i16x16();
4531        let b = b.as_i16x16();
4532        let r = match IMM8 {
4533            0 => simd_eq(a, b),
4534            1 => simd_lt(a, b),
4535            2 => simd_le(a, b),
4536            3 => i16x16::ZERO,
4537            4 => simd_ne(a, b),
4538            5 => simd_ge(a, b),
4539            6 => simd_gt(a, b),
4540            _ => i16x16::splat(-1),
4541        };
4542        simd_bitmask(r)
4543    }
4544}
4545
4546/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4547///
4548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi16_mask&expand=690)
4549#[inline]
4550#[target_feature(enable = "avx512bw,avx512vl")]
4551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4552#[rustc_legacy_const_generics(3)]
4553#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4554pub fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
4555    k1: __mmask16,
4556    a: __m256i,
4557    b: __m256i,
4558) -> __mmask16 {
4559    unsafe {
4560        static_assert_uimm_bits!(IMM8, 3);
4561        let a = a.as_i16x16();
4562        let b = b.as_i16x16();
4563        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4564        let r = match IMM8 {
4565            0 => simd_and(k1, simd_eq(a, b)),
4566            1 => simd_and(k1, simd_lt(a, b)),
4567            2 => simd_and(k1, simd_le(a, b)),
4568            3 => i16x16::ZERO,
4569            4 => simd_and(k1, simd_ne(a, b)),
4570            5 => simd_and(k1, simd_ge(a, b)),
4571            6 => simd_and(k1, simd_gt(a, b)),
4572            _ => k1,
4573        };
4574        simd_bitmask(r)
4575    }
4576}
4577
4578/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4579///
4580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi16_mask&expand=687)
4581#[inline]
4582#[target_feature(enable = "avx512bw,avx512vl")]
4583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4584#[rustc_legacy_const_generics(2)]
4585#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4586pub fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4587    unsafe {
4588        static_assert_uimm_bits!(IMM8, 3);
4589        let a = a.as_i16x8();
4590        let b = b.as_i16x8();
4591        let r = match IMM8 {
4592            0 => simd_eq(a, b),
4593            1 => simd_lt(a, b),
4594            2 => simd_le(a, b),
4595            3 => i16x8::ZERO,
4596            4 => simd_ne(a, b),
4597            5 => simd_ge(a, b),
4598            6 => simd_gt(a, b),
4599            _ => i16x8::splat(-1),
4600        };
4601        simd_bitmask(r)
4602    }
4603}
4604
4605/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4606///
4607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi16_mask&expand=688)
4608#[inline]
4609#[target_feature(enable = "avx512bw,avx512vl")]
4610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4611#[rustc_legacy_const_generics(3)]
4612#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4613pub fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4614    unsafe {
4615        static_assert_uimm_bits!(IMM8, 3);
4616        let a = a.as_i16x8();
4617        let b = b.as_i16x8();
4618        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4619        let r = match IMM8 {
4620            0 => simd_and(k1, simd_eq(a, b)),
4621            1 => simd_and(k1, simd_lt(a, b)),
4622            2 => simd_and(k1, simd_le(a, b)),
4623            3 => i16x8::ZERO,
4624            4 => simd_and(k1, simd_ne(a, b)),
4625            5 => simd_and(k1, simd_ge(a, b)),
4626            6 => simd_and(k1, simd_gt(a, b)),
4627            _ => k1,
4628        };
4629        simd_bitmask(r)
4630    }
4631}
4632
4633/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4634///
4635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi8_mask&expand=709)
4636#[inline]
4637#[target_feature(enable = "avx512bw")]
4638#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4639#[rustc_legacy_const_generics(2)]
4640#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4641pub fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4642    unsafe {
4643        static_assert_uimm_bits!(IMM8, 3);
4644        let a = a.as_i8x64();
4645        let b = b.as_i8x64();
4646        let r = match IMM8 {
4647            0 => simd_eq(a, b),
4648            1 => simd_lt(a, b),
4649            2 => simd_le(a, b),
4650            3 => i8x64::ZERO,
4651            4 => simd_ne(a, b),
4652            5 => simd_ge(a, b),
4653            6 => simd_gt(a, b),
4654            _ => i8x64::splat(-1),
4655        };
4656        simd_bitmask(r)
4657    }
4658}
4659
4660/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4661///
4662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi8_mask&expand=710)
4663#[inline]
4664#[target_feature(enable = "avx512bw")]
4665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4666#[rustc_legacy_const_generics(3)]
4667#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4668pub fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
4669    k1: __mmask64,
4670    a: __m512i,
4671    b: __m512i,
4672) -> __mmask64 {
4673    unsafe {
4674        static_assert_uimm_bits!(IMM8, 3);
4675        let a = a.as_i8x64();
4676        let b = b.as_i8x64();
4677        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4678        let r = match IMM8 {
4679            0 => simd_and(k1, simd_eq(a, b)),
4680            1 => simd_and(k1, simd_lt(a, b)),
4681            2 => simd_and(k1, simd_le(a, b)),
4682            3 => i8x64::ZERO,
4683            4 => simd_and(k1, simd_ne(a, b)),
4684            5 => simd_and(k1, simd_ge(a, b)),
4685            6 => simd_and(k1, simd_gt(a, b)),
4686            _ => k1,
4687        };
4688        simd_bitmask(r)
4689    }
4690}
4691
4692/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4693///
4694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi8_mask&expand=707)
4695#[inline]
4696#[target_feature(enable = "avx512bw,avx512vl")]
4697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4698#[rustc_legacy_const_generics(2)]
4699#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4700pub fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4701    unsafe {
4702        static_assert_uimm_bits!(IMM8, 3);
4703        let a = a.as_i8x32();
4704        let b = b.as_i8x32();
4705        let r = match IMM8 {
4706            0 => simd_eq(a, b),
4707            1 => simd_lt(a, b),
4708            2 => simd_le(a, b),
4709            3 => i8x32::ZERO,
4710            4 => simd_ne(a, b),
4711            5 => simd_ge(a, b),
4712            6 => simd_gt(a, b),
4713            _ => i8x32::splat(-1),
4714        };
4715        simd_bitmask(r)
4716    }
4717}
4718
4719/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4720///
4721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi8_mask&expand=708)
4722#[inline]
4723#[target_feature(enable = "avx512bw,avx512vl")]
4724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4725#[rustc_legacy_const_generics(3)]
4726#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4727pub fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
4728    k1: __mmask32,
4729    a: __m256i,
4730    b: __m256i,
4731) -> __mmask32 {
4732    unsafe {
4733        static_assert_uimm_bits!(IMM8, 3);
4734        let a = a.as_i8x32();
4735        let b = b.as_i8x32();
4736        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4737        let r = match IMM8 {
4738            0 => simd_and(k1, simd_eq(a, b)),
4739            1 => simd_and(k1, simd_lt(a, b)),
4740            2 => simd_and(k1, simd_le(a, b)),
4741            3 => i8x32::ZERO,
4742            4 => simd_and(k1, simd_ne(a, b)),
4743            5 => simd_and(k1, simd_ge(a, b)),
4744            6 => simd_and(k1, simd_gt(a, b)),
4745            _ => k1,
4746        };
4747        simd_bitmask(r)
4748    }
4749}
4750
4751/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4752///
4753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi8_mask&expand=705)
4754#[inline]
4755#[target_feature(enable = "avx512bw,avx512vl")]
4756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4757#[rustc_legacy_const_generics(2)]
4758#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4759pub fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4760    unsafe {
4761        static_assert_uimm_bits!(IMM8, 3);
4762        let a = a.as_i8x16();
4763        let b = b.as_i8x16();
4764        let r = match IMM8 {
4765            0 => simd_eq(a, b),
4766            1 => simd_lt(a, b),
4767            2 => simd_le(a, b),
4768            3 => i8x16::ZERO,
4769            4 => simd_ne(a, b),
4770            5 => simd_ge(a, b),
4771            6 => simd_gt(a, b),
4772            _ => i8x16::splat(-1),
4773        };
4774        simd_bitmask(r)
4775    }
4776}
4777
4778/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4779///
4780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi8_mask&expand=706)
4781#[inline]
4782#[target_feature(enable = "avx512bw,avx512vl")]
4783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4784#[rustc_legacy_const_generics(3)]
4785#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4786pub fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4787    unsafe {
4788        static_assert_uimm_bits!(IMM8, 3);
4789        let a = a.as_i8x16();
4790        let b = b.as_i8x16();
4791        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4792        let r = match IMM8 {
4793            0 => simd_and(k1, simd_eq(a, b)),
4794            1 => simd_and(k1, simd_lt(a, b)),
4795            2 => simd_and(k1, simd_le(a, b)),
4796            3 => i8x16::ZERO,
4797            4 => simd_and(k1, simd_ne(a, b)),
4798            5 => simd_and(k1, simd_ge(a, b)),
4799            6 => simd_and(k1, simd_gt(a, b)),
4800            _ => k1,
4801        };
4802        simd_bitmask(r)
4803    }
4804}
4805
4806/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
4807///
4808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi16)
4809#[inline]
4810#[target_feature(enable = "avx512bw,avx512vl")]
4811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4812pub fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
4813    unsafe { simd_reduce_add_unordered(a.as_i16x16()) }
4814}
4815
4816/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4817///
4818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi16)
4819#[inline]
4820#[target_feature(enable = "avx512bw,avx512vl")]
4821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4822pub fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
4823    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
4824}
4825
4826/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
4827///
4828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi16)
4829#[inline]
4830#[target_feature(enable = "avx512bw,avx512vl")]
4831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4832pub fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
4833    unsafe { simd_reduce_add_unordered(a.as_i16x8()) }
4834}
4835
4836/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4837///
4838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi16)
4839#[inline]
4840#[target_feature(enable = "avx512bw,avx512vl")]
4841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4842pub fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
4843    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
4844}
4845
4846/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
4847///
4848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi8)
4849#[inline]
4850#[target_feature(enable = "avx512bw,avx512vl")]
4851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4852pub fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
4853    unsafe { simd_reduce_add_unordered(a.as_i8x32()) }
4854}
4855
4856/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4857///
4858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi8)
4859#[inline]
4860#[target_feature(enable = "avx512bw,avx512vl")]
4861#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4862pub fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
4863    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
4864}
4865
4866/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
4867///
4868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi8)
4869#[inline]
4870#[target_feature(enable = "avx512bw,avx512vl")]
4871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4872pub fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
4873    unsafe { simd_reduce_add_unordered(a.as_i8x16()) }
4874}
4875
4876/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4877///
4878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi8)
4879#[inline]
4880#[target_feature(enable = "avx512bw,avx512vl")]
4881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4882pub fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
4883    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
4884}
4885
4886/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4887///
4888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi16)
4889#[inline]
4890#[target_feature(enable = "avx512bw,avx512vl")]
4891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4892pub fn _mm256_reduce_and_epi16(a: __m256i) -> i16 {
4893    unsafe { simd_reduce_and(a.as_i16x16()) }
4894}
4895
4896/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4897///
4898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi16)
4899#[inline]
4900#[target_feature(enable = "avx512bw,avx512vl")]
4901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4902pub fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 {
4903    unsafe {
4904        simd_reduce_and(simd_select_bitmask(
4905            k,
4906            a.as_i16x16(),
4907            _mm256_set1_epi64x(-1).as_i16x16(),
4908        ))
4909    }
4910}
4911
4912/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4913///
4914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi16)
4915#[inline]
4916#[target_feature(enable = "avx512bw,avx512vl")]
4917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4918pub fn _mm_reduce_and_epi16(a: __m128i) -> i16 {
4919    unsafe { simd_reduce_and(a.as_i16x8()) }
4920}
4921
4922/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4923///
4924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi16)
4925#[inline]
4926#[target_feature(enable = "avx512bw,avx512vl")]
4927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4928pub fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 {
4929    unsafe {
4930        simd_reduce_and(simd_select_bitmask(
4931            k,
4932            a.as_i16x8(),
4933            _mm_set1_epi64x(-1).as_i16x8(),
4934        ))
4935    }
4936}
4937
4938/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4939///
4940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi8)
4941#[inline]
4942#[target_feature(enable = "avx512bw,avx512vl")]
4943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4944pub fn _mm256_reduce_and_epi8(a: __m256i) -> i8 {
4945    unsafe { simd_reduce_and(a.as_i8x32()) }
4946}
4947
4948/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4949///
4950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi8)
4951#[inline]
4952#[target_feature(enable = "avx512bw,avx512vl")]
4953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4954pub fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 {
4955    unsafe {
4956        simd_reduce_and(simd_select_bitmask(
4957            k,
4958            a.as_i8x32(),
4959            _mm256_set1_epi64x(-1).as_i8x32(),
4960        ))
4961    }
4962}
4963
4964/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4965///
4966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi8)
4967#[inline]
4968#[target_feature(enable = "avx512bw,avx512vl")]
4969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4970pub fn _mm_reduce_and_epi8(a: __m128i) -> i8 {
4971    unsafe { simd_reduce_and(a.as_i8x16()) }
4972}
4973
4974/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4975///
4976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi8)
4977#[inline]
4978#[target_feature(enable = "avx512bw,avx512vl")]
4979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4980pub fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 {
4981    unsafe {
4982        simd_reduce_and(simd_select_bitmask(
4983            k,
4984            a.as_i8x16(),
4985            _mm_set1_epi64x(-1).as_i8x16(),
4986        ))
4987    }
4988}
4989
4990/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
4991///
4992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi16)
4993#[inline]
4994#[target_feature(enable = "avx512bw,avx512vl")]
4995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4996pub fn _mm256_reduce_max_epi16(a: __m256i) -> i16 {
4997    unsafe { simd_reduce_max(a.as_i16x16()) }
4998}
4999
5000/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5001///
5002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi16)
5003#[inline]
5004#[target_feature(enable = "avx512bw,avx512vl")]
5005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5006pub fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 {
5007    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(-32768))) }
5008}
5009
5010/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5011///
5012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi16)
5013#[inline]
5014#[target_feature(enable = "avx512bw,avx512vl")]
5015#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5016pub fn _mm_reduce_max_epi16(a: __m128i) -> i16 {
5017    unsafe { simd_reduce_max(a.as_i16x8()) }
5018}
5019
5020/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5021///
5022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi16)
5023#[inline]
5024#[target_feature(enable = "avx512bw,avx512vl")]
5025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5026pub fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 {
5027    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(-32768))) }
5028}
5029
5030/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5031///
5032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi8)
5033#[inline]
5034#[target_feature(enable = "avx512bw,avx512vl")]
5035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5036pub fn _mm256_reduce_max_epi8(a: __m256i) -> i8 {
5037    unsafe { simd_reduce_max(a.as_i8x32()) }
5038}
5039
5040/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5041///
5042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi8)
5043#[inline]
5044#[target_feature(enable = "avx512bw,avx512vl")]
5045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5046pub fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 {
5047    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(-128))) }
5048}
5049
5050/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5051///
5052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi8)
5053#[inline]
5054#[target_feature(enable = "avx512bw,avx512vl")]
5055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5056pub fn _mm_reduce_max_epi8(a: __m128i) -> i8 {
5057    unsafe { simd_reduce_max(a.as_i8x16()) }
5058}
5059
5060/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5061///
5062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi8)
5063#[inline]
5064#[target_feature(enable = "avx512bw,avx512vl")]
5065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5066pub fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 {
5067    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(-128))) }
5068}
5069
5070/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5071///
5072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu16)
5073#[inline]
5074#[target_feature(enable = "avx512bw,avx512vl")]
5075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5076pub fn _mm256_reduce_max_epu16(a: __m256i) -> u16 {
5077    unsafe { simd_reduce_max(a.as_u16x16()) }
5078}
5079
5080/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5081///
5082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu16)
5083#[inline]
5084#[target_feature(enable = "avx512bw,avx512vl")]
5085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5086pub fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 {
5087    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::ZERO)) }
5088}
5089
5090/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5091///
5092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu16)
5093#[inline]
5094#[target_feature(enable = "avx512bw,avx512vl")]
5095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5096pub fn _mm_reduce_max_epu16(a: __m128i) -> u16 {
5097    unsafe { simd_reduce_max(a.as_u16x8()) }
5098}
5099
5100/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5101///
5102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu16)
5103#[inline]
5104#[target_feature(enable = "avx512bw,avx512vl")]
5105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5106pub fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 {
5107    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::ZERO)) }
5108}
5109
5110/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5111///
5112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu8)
5113#[inline]
5114#[target_feature(enable = "avx512bw,avx512vl")]
5115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5116pub fn _mm256_reduce_max_epu8(a: __m256i) -> u8 {
5117    unsafe { simd_reduce_max(a.as_u8x32()) }
5118}
5119
5120/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5121///
5122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu8)
5123#[inline]
5124#[target_feature(enable = "avx512bw,avx512vl")]
5125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5126pub fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 {
5127    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::ZERO)) }
5128}
5129
5130/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu8)
5133#[inline]
5134#[target_feature(enable = "avx512bw,avx512vl")]
5135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5136pub fn _mm_reduce_max_epu8(a: __m128i) -> u8 {
5137    unsafe { simd_reduce_max(a.as_u8x16()) }
5138}
5139
5140/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5141///
5142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu8)
5143#[inline]
5144#[target_feature(enable = "avx512bw,avx512vl")]
5145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5146pub fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 {
5147    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::ZERO)) }
5148}
5149
5150/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5151///
5152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi16)
5153#[inline]
5154#[target_feature(enable = "avx512bw,avx512vl")]
5155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5156pub fn _mm256_reduce_min_epi16(a: __m256i) -> i16 {
5157    unsafe { simd_reduce_min(a.as_i16x16()) }
5158}
5159
5160/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5161///
5162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi16)
5163#[inline]
5164#[target_feature(enable = "avx512bw,avx512vl")]
5165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5166pub fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 {
5167    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(0x7fff))) }
5168}
5169
5170/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5171///
5172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi16)
5173#[inline]
5174#[target_feature(enable = "avx512bw,avx512vl")]
5175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5176pub fn _mm_reduce_min_epi16(a: __m128i) -> i16 {
5177    unsafe { simd_reduce_min(a.as_i16x8()) }
5178}
5179
5180/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5181///
5182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi16)
5183#[inline]
5184#[target_feature(enable = "avx512bw,avx512vl")]
5185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5186pub fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 {
5187    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(0x7fff))) }
5188}
5189
5190/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5191///
5192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi8)
5193#[inline]
5194#[target_feature(enable = "avx512bw,avx512vl")]
5195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5196pub fn _mm256_reduce_min_epi8(a: __m256i) -> i8 {
5197    unsafe { simd_reduce_min(a.as_i8x32()) }
5198}
5199
5200/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5201///
5202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi8)
5203#[inline]
5204#[target_feature(enable = "avx512bw,avx512vl")]
5205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5206pub fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 {
5207    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(0x7f))) }
5208}
5209
5210/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5211///
5212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi8)
5213#[inline]
5214#[target_feature(enable = "avx512bw,avx512vl")]
5215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5216pub fn _mm_reduce_min_epi8(a: __m128i) -> i8 {
5217    unsafe { simd_reduce_min(a.as_i8x16()) }
5218}
5219
5220/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5221///
5222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi8)
5223#[inline]
5224#[target_feature(enable = "avx512bw,avx512vl")]
5225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5226pub fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 {
5227    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(0x7f))) }
5228}
5229
5230/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5231///
5232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu16)
5233#[inline]
5234#[target_feature(enable = "avx512bw,avx512vl")]
5235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5236pub fn _mm256_reduce_min_epu16(a: __m256i) -> u16 {
5237    unsafe { simd_reduce_min(a.as_u16x16()) }
5238}
5239
5240/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu16)
5243#[inline]
5244#[target_feature(enable = "avx512bw,avx512vl")]
5245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5246pub fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 {
5247    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0xffff))) }
5248}
5249
5250/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5251///
5252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu16)
5253#[inline]
5254#[target_feature(enable = "avx512bw,avx512vl")]
5255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5256pub fn _mm_reduce_min_epu16(a: __m128i) -> u16 {
5257    unsafe { simd_reduce_min(a.as_u16x8()) }
5258}
5259
5260/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5261///
5262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu16)
5263#[inline]
5264#[target_feature(enable = "avx512bw,avx512vl")]
5265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5266pub fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 {
5267    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0xffff))) }
5268}
5269
5270/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5271///
5272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu8)
5273#[inline]
5274#[target_feature(enable = "avx512bw,avx512vl")]
5275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5276pub fn _mm256_reduce_min_epu8(a: __m256i) -> u8 {
5277    unsafe { simd_reduce_min(a.as_u8x32()) }
5278}
5279
5280/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5281///
5282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu8)
5283#[inline]
5284#[target_feature(enable = "avx512bw,avx512vl")]
5285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5286pub fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 {
5287    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0xff))) }
5288}
5289
5290/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5291///
5292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu8)
5293#[inline]
5294#[target_feature(enable = "avx512bw,avx512vl")]
5295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5296pub fn _mm_reduce_min_epu8(a: __m128i) -> u8 {
5297    unsafe { simd_reduce_min(a.as_u8x16()) }
5298}
5299
5300/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5301///
5302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu8)
5303#[inline]
5304#[target_feature(enable = "avx512bw,avx512vl")]
5305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5306pub fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
5307    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0xff))) }
5308}
5309
5310/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5311///
5312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi16)
5313#[inline]
5314#[target_feature(enable = "avx512bw,avx512vl")]
5315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5316pub fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
5317    unsafe { simd_reduce_mul_unordered(a.as_i16x16()) }
5318}
5319
5320/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5321///
5322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi16)
5323#[inline]
5324#[target_feature(enable = "avx512bw,avx512vl")]
5325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5326pub fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
5327    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1))) }
5328}
5329
5330/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5331///
5332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi16)
5333#[inline]
5334#[target_feature(enable = "avx512bw,avx512vl")]
5335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5336pub fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
5337    unsafe { simd_reduce_mul_unordered(a.as_i16x8()) }
5338}
5339
5340/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5341///
5342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi16)
5343#[inline]
5344#[target_feature(enable = "avx512bw,avx512vl")]
5345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5346pub fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
5347    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1))) }
5348}
5349
5350/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5351///
5352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi8)
5353#[inline]
5354#[target_feature(enable = "avx512bw,avx512vl")]
5355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5356pub fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
5357    unsafe { simd_reduce_mul_unordered(a.as_i8x32()) }
5358}
5359
5360/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5361///
5362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi8)
5363#[inline]
5364#[target_feature(enable = "avx512bw,avx512vl")]
5365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5366pub fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
5367    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1))) }
5368}
5369
5370/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5371///
5372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi8)
5373#[inline]
5374#[target_feature(enable = "avx512bw,avx512vl")]
5375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5376pub fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
5377    unsafe { simd_reduce_mul_unordered(a.as_i8x16()) }
5378}
5379
5380/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5381///
5382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi8)
5383#[inline]
5384#[target_feature(enable = "avx512bw,avx512vl")]
5385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5386pub fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
5387    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1))) }
5388}
5389
5390/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5391///
5392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi16)
5393#[inline]
5394#[target_feature(enable = "avx512bw,avx512vl")]
5395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5396pub fn _mm256_reduce_or_epi16(a: __m256i) -> i16 {
5397    unsafe { simd_reduce_or(a.as_i16x16()) }
5398}
5399
5400/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5401///
5402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi16)
5403#[inline]
5404#[target_feature(enable = "avx512bw,avx512vl")]
5405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5406pub fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 {
5407    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
5408}
5409
5410/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5411///
5412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi16)
5413#[inline]
5414#[target_feature(enable = "avx512bw,avx512vl")]
5415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5416pub fn _mm_reduce_or_epi16(a: __m128i) -> i16 {
5417    unsafe { simd_reduce_or(a.as_i16x8()) }
5418}
5419
5420/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5421///
5422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi16)
5423#[inline]
5424#[target_feature(enable = "avx512bw,avx512vl")]
5425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5426pub fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 {
5427    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
5428}
5429
5430/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5431///
5432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi8)
5433#[inline]
5434#[target_feature(enable = "avx512bw,avx512vl")]
5435#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5436pub fn _mm256_reduce_or_epi8(a: __m256i) -> i8 {
5437    unsafe { simd_reduce_or(a.as_i8x32()) }
5438}
5439
5440/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi8)
5443#[inline]
5444#[target_feature(enable = "avx512bw,avx512vl")]
5445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5446pub fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 {
5447    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
5448}
5449
5450/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5451///
5452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi8)
5453#[inline]
5454#[target_feature(enable = "avx512bw,avx512vl")]
5455#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5456pub fn _mm_reduce_or_epi8(a: __m128i) -> i8 {
5457    unsafe { simd_reduce_or(a.as_i8x16()) }
5458}
5459
5460/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5461///
5462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi8)
5463#[inline]
5464#[target_feature(enable = "avx512bw,avx512vl")]
5465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5466pub fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 {
5467    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
5468}
5469
5470/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5471///
5472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi16&expand=3368)
5473#[inline]
5474#[target_feature(enable = "avx512bw")]
5475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5476#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5477pub unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
5478    ptr::read_unaligned(mem_addr as *const __m512i)
5479}
5480
5481/// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5482///
5483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi16&expand=3365)
5484#[inline]
5485#[target_feature(enable = "avx512bw,avx512vl")]
5486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5487#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5488pub unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
5489    ptr::read_unaligned(mem_addr as *const __m256i)
5490}
5491
5492/// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5493///
5494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi16&expand=3362)
5495#[inline]
5496#[target_feature(enable = "avx512bw,avx512vl")]
5497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5498#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5499pub unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
5500    ptr::read_unaligned(mem_addr as *const __m128i)
5501}
5502
5503/// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5504///
5505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi8&expand=3395)
5506#[inline]
5507#[target_feature(enable = "avx512bw")]
5508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5509#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5510pub unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
5511    ptr::read_unaligned(mem_addr as *const __m512i)
5512}
5513
5514/// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5515///
5516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi8&expand=3392)
5517#[inline]
5518#[target_feature(enable = "avx512bw,avx512vl")]
5519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5520#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5521pub unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
5522    ptr::read_unaligned(mem_addr as *const __m256i)
5523}
5524
5525/// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5526///
5527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi8&expand=3389)
5528#[inline]
5529#[target_feature(enable = "avx512bw,avx512vl")]
5530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5531#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5532pub unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
5533    ptr::read_unaligned(mem_addr as *const __m128i)
5534}
5535
5536/// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5537///
5538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi16&expand=5622)
5539#[inline]
5540#[target_feature(enable = "avx512bw")]
5541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5542#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5543pub unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
5544    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5545}
5546
5547/// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5548///
5549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi16&expand=5620)
5550#[inline]
5551#[target_feature(enable = "avx512bw,avx512vl")]
5552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5553#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5554pub unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
5555    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5556}
5557
5558/// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5559///
5560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi16&expand=5618)
5561#[inline]
5562#[target_feature(enable = "avx512bw,avx512vl")]
5563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5564#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5565pub unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
5566    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5567}
5568
5569/// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5570///
5571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi8&expand=5640)
5572#[inline]
5573#[target_feature(enable = "avx512bw")]
5574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5575#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5576pub unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
5577    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5578}
5579
5580/// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5581///
5582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi8&expand=5638)
5583#[inline]
5584#[target_feature(enable = "avx512bw,avx512vl")]
5585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5586#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5587pub unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
5588    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5589}
5590
5591/// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5592///
5593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi8&expand=5636)
5594#[inline]
5595#[target_feature(enable = "avx512bw,avx512vl")]
5596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5597#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5598pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
5599    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5600}
5601
5602/// Load packed 16-bit integers from memory into dst using writemask k
5603/// (elements are copied from src when the corresponding mask bit is not set).
5604/// mem_addr does not need to be aligned on any particular boundary.
5605///
5606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi16)
5607#[inline]
5608#[target_feature(enable = "avx512bw")]
5609#[cfg_attr(test, assert_instr(vmovdqu16))]
5610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5611pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i {
5612    let mask = simd_select_bitmask(k, i16x32::splat(!0), i16x32::ZERO);
5613    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x32()).as_m512i()
5614}
5615
5616/// Load packed 16-bit integers from memory into dst using zeromask k
5617/// (elements are zeroed out when the corresponding mask bit is not set).
5618/// mem_addr does not need to be aligned on any particular boundary.
5619///
5620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi16)
5621#[inline]
5622#[target_feature(enable = "avx512bw")]
5623#[cfg_attr(test, assert_instr(vmovdqu16))]
5624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5625pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
5626    _mm512_mask_loadu_epi16(_mm512_setzero_si512(), k, mem_addr)
5627}
5628
5629/// Load packed 8-bit integers from memory into dst using writemask k
5630/// (elements are copied from src when the corresponding mask bit is not set).
5631/// mem_addr does not need to be aligned on any particular boundary.
5632///
5633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi8)
5634#[inline]
5635#[target_feature(enable = "avx512bw")]
5636#[cfg_attr(test, assert_instr(vmovdqu8))]
5637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5638pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i {
5639    let mask = simd_select_bitmask(k, i8x64::splat(!0), i8x64::ZERO);
5640    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x64()).as_m512i()
5641}
5642
5643/// Load packed 8-bit integers from memory into dst using zeromask k
5644/// (elements are zeroed out when the corresponding mask bit is not set).
5645/// mem_addr does not need to be aligned on any particular boundary.
5646///
5647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi8)
5648#[inline]
5649#[target_feature(enable = "avx512bw")]
5650#[cfg_attr(test, assert_instr(vmovdqu8))]
5651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5652pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
5653    _mm512_mask_loadu_epi8(_mm512_setzero_si512(), k, mem_addr)
5654}
5655
5656/// Load packed 16-bit integers from memory into dst using writemask k
5657/// (elements are copied from src when the corresponding mask bit is not set).
5658/// mem_addr does not need to be aligned on any particular boundary.
5659///
5660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi16)
5661#[inline]
5662#[target_feature(enable = "avx512bw,avx512vl")]
5663#[cfg_attr(test, assert_instr(vmovdqu16))]
5664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5665pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i {
5666    let mask = simd_select_bitmask(k, i16x16::splat(!0), i16x16::ZERO);
5667    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x16()).as_m256i()
5668}
5669
5670/// Load packed 16-bit integers from memory into dst using zeromask k
5671/// (elements are zeroed out when the corresponding mask bit is not set).
5672/// mem_addr does not need to be aligned on any particular boundary.
5673///
5674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi16)
5675#[inline]
5676#[target_feature(enable = "avx512bw,avx512vl")]
5677#[cfg_attr(test, assert_instr(vmovdqu16))]
5678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5679pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
5680    _mm256_mask_loadu_epi16(_mm256_setzero_si256(), k, mem_addr)
5681}
5682
5683/// Load packed 8-bit integers from memory into dst using writemask k
5684/// (elements are copied from src when the corresponding mask bit is not set).
5685/// mem_addr does not need to be aligned on any particular boundary.
5686///
5687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi8)
5688#[inline]
5689#[target_feature(enable = "avx512bw,avx512vl")]
5690#[cfg_attr(test, assert_instr(vmovdqu8))]
5691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5692pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i {
5693    let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
5694    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x32()).as_m256i()
5695}
5696
5697/// Load packed 8-bit integers from memory into dst using zeromask k
5698/// (elements are zeroed out when the corresponding mask bit is not set).
5699/// mem_addr does not need to be aligned on any particular boundary.
5700///
5701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi8)
5702#[inline]
5703#[target_feature(enable = "avx512bw,avx512vl")]
5704#[cfg_attr(test, assert_instr(vmovdqu8))]
5705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5706pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
5707    _mm256_mask_loadu_epi8(_mm256_setzero_si256(), k, mem_addr)
5708}
5709
5710/// Load packed 16-bit integers from memory into dst using writemask k
5711/// (elements are copied from src when the corresponding mask bit is not set).
5712/// mem_addr does not need to be aligned on any particular boundary.
5713///
5714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi16)
5715#[inline]
5716#[target_feature(enable = "avx512bw,avx512vl")]
5717#[cfg_attr(test, assert_instr(vmovdqu16))]
5718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5719pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i {
5720    let mask = simd_select_bitmask(k, i16x8::splat(!0), i16x8::ZERO);
5721    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x8()).as_m128i()
5722}
5723
5724/// Load packed 16-bit integers from memory into dst using zeromask k
5725/// (elements are zeroed out when the corresponding mask bit is not set).
5726/// mem_addr does not need to be aligned on any particular boundary.
5727///
5728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi16)
5729#[inline]
5730#[target_feature(enable = "avx512bw,avx512vl")]
5731#[cfg_attr(test, assert_instr(vmovdqu16))]
5732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5733pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
5734    _mm_mask_loadu_epi16(_mm_setzero_si128(), k, mem_addr)
5735}
5736
5737/// Load packed 8-bit integers from memory into dst using writemask k
5738/// (elements are copied from src when the corresponding mask bit is not set).
5739/// mem_addr does not need to be aligned on any particular boundary.
5740///
5741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi8)
5742#[inline]
5743#[target_feature(enable = "avx512bw,avx512vl")]
5744#[cfg_attr(test, assert_instr(vmovdqu8))]
5745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5746pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i {
5747    let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
5748    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x16()).as_m128i()
5749}
5750
5751/// Load packed 8-bit integers from memory into dst using zeromask k
5752/// (elements are zeroed out when the corresponding mask bit is not set).
5753/// mem_addr does not need to be aligned on any particular boundary.
5754///
5755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi8)
5756#[inline]
5757#[target_feature(enable = "avx512bw,avx512vl")]
5758#[cfg_attr(test, assert_instr(vmovdqu8))]
5759#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5760pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
5761    _mm_mask_loadu_epi8(_mm_setzero_si128(), k, mem_addr)
5762}
5763
5764/// Store packed 16-bit integers from a into memory using writemask k.
5765/// mem_addr does not need to be aligned on any particular boundary.
5766///
5767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi16)
5768#[inline]
5769#[target_feature(enable = "avx512bw")]
5770#[cfg_attr(test, assert_instr(vmovdqu16))]
5771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5772pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
5773    let mask = simd_select_bitmask(mask, i16x32::splat(!0), i16x32::ZERO);
5774    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x32());
5775}
5776
5777/// Store packed 8-bit integers from a into memory using writemask k.
5778/// mem_addr does not need to be aligned on any particular boundary.
5779///
5780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi8)
5781#[inline]
5782#[target_feature(enable = "avx512bw")]
5783#[cfg_attr(test, assert_instr(vmovdqu8))]
5784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5785pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
5786    let mask = simd_select_bitmask(mask, i8x64::splat(!0), i8x64::ZERO);
5787    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x64());
5788}
5789
5790/// Store packed 16-bit integers from a into memory using writemask k.
5791/// mem_addr does not need to be aligned on any particular boundary.
5792///
5793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi16)
5794#[inline]
5795#[target_feature(enable = "avx512bw,avx512vl")]
5796#[cfg_attr(test, assert_instr(vmovdqu16))]
5797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5798pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
5799    let mask = simd_select_bitmask(mask, i16x16::splat(!0), i16x16::ZERO);
5800    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x16());
5801}
5802
5803/// Store packed 8-bit integers from a into memory using writemask k.
5804/// mem_addr does not need to be aligned on any particular boundary.
5805///
5806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi8)
5807#[inline]
5808#[target_feature(enable = "avx512bw,avx512vl")]
5809#[cfg_attr(test, assert_instr(vmovdqu8))]
5810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5811pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
5812    let mask = simd_select_bitmask(mask, i8x32::splat(!0), i8x32::ZERO);
5813    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x32());
5814}
5815
5816/// Store packed 16-bit integers from a into memory using writemask k.
5817/// mem_addr does not need to be aligned on any particular boundary.
5818///
5819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi16)
5820#[inline]
5821#[target_feature(enable = "avx512bw,avx512vl")]
5822#[cfg_attr(test, assert_instr(vmovdqu16))]
5823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5824pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
5825    let mask = simd_select_bitmask(mask, i16x8::splat(!0), i16x8::ZERO);
5826    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x8());
5827}
5828
5829/// Store packed 8-bit integers from a into memory using writemask k.
5830/// mem_addr does not need to be aligned on any particular boundary.
5831///
5832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi8)
5833#[inline]
5834#[target_feature(enable = "avx512bw,avx512vl")]
5835#[cfg_attr(test, assert_instr(vmovdqu8))]
5836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5837pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
5838    let mask = simd_select_bitmask(mask, i8x16::splat(!0), i8x16::ZERO);
5839    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x16());
5840}
5841
5842/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
5843///
5844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_madd_epi16&expand=3511)
5845#[inline]
5846#[target_feature(enable = "avx512bw")]
5847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5848#[cfg_attr(test, assert_instr(vpmaddwd))]
5849pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
5850    // It's a trick used in the Adler-32 algorithm to perform a widening addition.
5851    //
5852    // ```rust
5853    // #[target_feature(enable = "avx512bw")]
5854    // unsafe fn widening_add(mad: __m512i) -> __m512i {
5855    //     _mm512_madd_epi16(mad, _mm512_set1_epi16(1))
5856    // }
5857    // ```
5858    //
5859    // If we implement this using generic vector intrinsics, the optimizer
5860    // will eliminate this pattern, and `vpmaddwd` will no longer be emitted.
5861    // For this reason, we use x86 intrinsics.
5862    unsafe { transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32())) }
5863}
5864
5865/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5866///
5867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_madd_epi16&expand=3512)
5868#[inline]
5869#[target_feature(enable = "avx512bw")]
5870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5871#[cfg_attr(test, assert_instr(vpmaddwd))]
5872pub fn _mm512_mask_madd_epi16(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
5873    unsafe {
5874        let madd = _mm512_madd_epi16(a, b).as_i32x16();
5875        transmute(simd_select_bitmask(k, madd, src.as_i32x16()))
5876    }
5877}
5878
5879/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5880///
5881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_madd_epi16&expand=3513)
5882#[inline]
5883#[target_feature(enable = "avx512bw")]
5884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5885#[cfg_attr(test, assert_instr(vpmaddwd))]
5886pub fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
5887    unsafe {
5888        let madd = _mm512_madd_epi16(a, b).as_i32x16();
5889        transmute(simd_select_bitmask(k, madd, i32x16::ZERO))
5890    }
5891}
5892
5893/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5894///
5895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_madd_epi16&expand=3509)
5896#[inline]
5897#[target_feature(enable = "avx512bw,avx512vl")]
5898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5899#[cfg_attr(test, assert_instr(vpmaddwd))]
5900pub fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
5901    unsafe {
5902        let madd = _mm256_madd_epi16(a, b).as_i32x8();
5903        transmute(simd_select_bitmask(k, madd, src.as_i32x8()))
5904    }
5905}
5906
5907/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5908///
5909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_madd_epi16&expand=3510)
5910#[inline]
5911#[target_feature(enable = "avx512bw,avx512vl")]
5912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5913#[cfg_attr(test, assert_instr(vpmaddwd))]
5914pub fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
5915    unsafe {
5916        let madd = _mm256_madd_epi16(a, b).as_i32x8();
5917        transmute(simd_select_bitmask(k, madd, i32x8::ZERO))
5918    }
5919}
5920
5921/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5922///
5923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_madd_epi16&expand=3506)
5924#[inline]
5925#[target_feature(enable = "avx512bw,avx512vl")]
5926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5927#[cfg_attr(test, assert_instr(vpmaddwd))]
5928pub fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5929    unsafe {
5930        let madd = _mm_madd_epi16(a, b).as_i32x4();
5931        transmute(simd_select_bitmask(k, madd, src.as_i32x4()))
5932    }
5933}
5934
5935/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5936///
5937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_madd_epi16&expand=3507)
5938#[inline]
5939#[target_feature(enable = "avx512bw,avx512vl")]
5940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5941#[cfg_attr(test, assert_instr(vpmaddwd))]
5942pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5943    unsafe {
5944        let madd = _mm_madd_epi16(a, b).as_i32x4();
5945        transmute(simd_select_bitmask(k, madd, i32x4::ZERO))
5946    }
5947}
5948
5949/// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst.
5950///
5951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maddubs_epi16&expand=3539)
5952#[inline]
5953#[target_feature(enable = "avx512bw")]
5954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5955#[cfg_attr(test, assert_instr(vpmaddubsw))]
5956pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
5957    unsafe { transmute(vpmaddubsw(a.as_u8x64(), b.as_i8x64())) }
5958}
5959
5960/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5961///
5962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_maddubs_epi16&expand=3540)
5963#[inline]
5964#[target_feature(enable = "avx512bw")]
5965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5966#[cfg_attr(test, assert_instr(vpmaddubsw))]
5967pub fn _mm512_mask_maddubs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
5968    unsafe {
5969        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
5970        transmute(simd_select_bitmask(k, madd, src.as_i16x32()))
5971    }
5972}
5973
5974/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5975///
5976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_maddubs_epi16&expand=3541)
5977#[inline]
5978#[target_feature(enable = "avx512bw")]
5979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5980#[cfg_attr(test, assert_instr(vpmaddubsw))]
5981pub fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
5982    unsafe {
5983        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
5984        transmute(simd_select_bitmask(k, madd, i16x32::ZERO))
5985    }
5986}
5987
5988/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5989///
5990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_maddubs_epi16&expand=3537)
5991#[inline]
5992#[target_feature(enable = "avx512bw,avx512vl")]
5993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5994#[cfg_attr(test, assert_instr(vpmaddubsw))]
5995pub fn _mm256_mask_maddubs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
5996    unsafe {
5997        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
5998        transmute(simd_select_bitmask(k, madd, src.as_i16x16()))
5999    }
6000}
6001
6002/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6003///
6004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_maddubs_epi16&expand=3538)
6005#[inline]
6006#[target_feature(enable = "avx512bw,avx512vl")]
6007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6008#[cfg_attr(test, assert_instr(vpmaddubsw))]
6009pub fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6010    unsafe {
6011        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
6012        transmute(simd_select_bitmask(k, madd, i16x16::ZERO))
6013    }
6014}
6015
6016/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6017///
6018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_maddubs_epi16&expand=3534)
6019#[inline]
6020#[target_feature(enable = "avx512bw,avx512vl")]
6021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6022#[cfg_attr(test, assert_instr(vpmaddubsw))]
6023pub fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6024    unsafe {
6025        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6026        transmute(simd_select_bitmask(k, madd, src.as_i16x8()))
6027    }
6028}
6029
6030/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6031///
6032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_maddubs_epi16&expand=3535)
6033#[inline]
6034#[target_feature(enable = "avx512bw,avx512vl")]
6035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6036#[cfg_attr(test, assert_instr(vpmaddubsw))]
6037pub fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6038    unsafe {
6039        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6040        transmute(simd_select_bitmask(k, madd, i16x8::ZERO))
6041    }
6042}
6043
6044/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst.
6045///
6046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi32&expand=4091)
6047#[inline]
6048#[target_feature(enable = "avx512bw")]
6049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6050#[cfg_attr(test, assert_instr(vpackssdw))]
6051pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
6052    unsafe { transmute(vpackssdw(a.as_i32x16(), b.as_i32x16())) }
6053}
6054
6055/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6056///
6057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi32&expand=4089)
6058#[inline]
6059#[target_feature(enable = "avx512bw")]
6060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6061#[cfg_attr(test, assert_instr(vpackssdw))]
6062pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6063    unsafe {
6064        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6065        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6066    }
6067}
6068
6069/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6070///
6071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi32&expand=4090)
6072#[inline]
6073#[target_feature(enable = "avx512bw")]
6074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6075#[cfg_attr(test, assert_instr(vpackssdw))]
6076pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6077    unsafe {
6078        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6079        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6080    }
6081}
6082
6083/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6084///
6085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi32&expand=4086)
6086#[inline]
6087#[target_feature(enable = "avx512bw,avx512vl")]
6088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6089#[cfg_attr(test, assert_instr(vpackssdw))]
6090pub fn _mm256_mask_packs_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6091    unsafe {
6092        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6093        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6094    }
6095}
6096
6097/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6098///
6099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packs_epi32&expand=4087)
6100#[inline]
6101#[target_feature(enable = "avx512bw,avx512vl")]
6102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6103#[cfg_attr(test, assert_instr(vpackssdw))]
6104pub fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6105    unsafe {
6106        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6107        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6108    }
6109}
6110
6111/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6112///
6113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi32&expand=4083)
6114#[inline]
6115#[target_feature(enable = "avx512bw,avx512vl")]
6116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6117#[cfg_attr(test, assert_instr(vpackssdw))]
6118pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6119    unsafe {
6120        let pack = _mm_packs_epi32(a, b).as_i16x8();
6121        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6122    }
6123}
6124
6125/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6126///
6127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi32&expand=4084)
6128#[inline]
6129#[target_feature(enable = "avx512bw,avx512vl")]
6130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6131#[cfg_attr(test, assert_instr(vpackssdw))]
6132pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6133    unsafe {
6134        let pack = _mm_packs_epi32(a, b).as_i16x8();
6135        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6136    }
6137}
6138
6139/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst.
6140///
6141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi16&expand=4082)
6142#[inline]
6143#[target_feature(enable = "avx512bw")]
6144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6145#[cfg_attr(test, assert_instr(vpacksswb))]
6146pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
6147    unsafe { transmute(vpacksswb(a.as_i16x32(), b.as_i16x32())) }
6148}
6149
6150/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6151///
6152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi16&expand=4080)
6153#[inline]
6154#[target_feature(enable = "avx512bw")]
6155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6156#[cfg_attr(test, assert_instr(vpacksswb))]
6157pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6158    unsafe {
6159        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6160        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6161    }
6162}
6163
6164/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6165///
6166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi16&expand=4081)
6167#[inline]
6168#[target_feature(enable = "avx512bw")]
6169#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6170#[cfg_attr(test, assert_instr(vpacksswb))]
6171pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6172    unsafe {
6173        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6174        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6175    }
6176}
6177
6178/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6179///
6180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi16&expand=4077)
6181#[inline]
6182#[target_feature(enable = "avx512bw,avx512vl")]
6183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6184#[cfg_attr(test, assert_instr(vpacksswb))]
6185pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6186    unsafe {
6187        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6188        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6189    }
6190}
6191
6192/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6193///
6194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_maskz_packs_epi16&expand=4078)
6195#[inline]
6196#[target_feature(enable = "avx512bw,avx512vl")]
6197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6198#[cfg_attr(test, assert_instr(vpacksswb))]
6199pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6200    unsafe {
6201        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6202        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6203    }
6204}
6205
6206/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6207///
6208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi16&expand=4074)
6209#[inline]
6210#[target_feature(enable = "avx512bw,avx512vl")]
6211#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6212#[cfg_attr(test, assert_instr(vpacksswb))]
6213pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6214    unsafe {
6215        let pack = _mm_packs_epi16(a, b).as_i8x16();
6216        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6217    }
6218}
6219
6220/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6221///
6222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi16&expand=4075)
6223#[inline]
6224#[target_feature(enable = "avx512bw,avx512vl")]
6225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6226#[cfg_attr(test, assert_instr(vpacksswb))]
6227pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6228    unsafe {
6229        let pack = _mm_packs_epi16(a, b).as_i8x16();
6230        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6231    }
6232}
6233
6234/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst.
6235///
6236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi32&expand=4130)
6237#[inline]
6238#[target_feature(enable = "avx512bw")]
6239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6240#[cfg_attr(test, assert_instr(vpackusdw))]
6241pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
6242    unsafe { transmute(vpackusdw(a.as_i32x16(), b.as_i32x16())) }
6243}
6244
6245/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6246///
6247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi32&expand=4128)
6248#[inline]
6249#[target_feature(enable = "avx512bw")]
6250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6251#[cfg_attr(test, assert_instr(vpackusdw))]
6252pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6253    unsafe {
6254        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6255        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6256    }
6257}
6258
6259/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6260///
6261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi32&expand=4129)
6262#[inline]
6263#[target_feature(enable = "avx512bw")]
6264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6265#[cfg_attr(test, assert_instr(vpackusdw))]
6266pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6267    unsafe {
6268        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6269        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6270    }
6271}
6272
6273/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6274///
6275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi32&expand=4125)
6276#[inline]
6277#[target_feature(enable = "avx512bw,avx512vl")]
6278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6279#[cfg_attr(test, assert_instr(vpackusdw))]
6280pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6281    unsafe {
6282        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6283        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6284    }
6285}
6286
6287/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6288///
6289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi32&expand=4126)
6290#[inline]
6291#[target_feature(enable = "avx512bw,avx512vl")]
6292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6293#[cfg_attr(test, assert_instr(vpackusdw))]
6294pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6295    unsafe {
6296        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6297        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6298    }
6299}
6300
6301/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6302///
6303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi32&expand=4122)
6304#[inline]
6305#[target_feature(enable = "avx512bw,avx512vl")]
6306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6307#[cfg_attr(test, assert_instr(vpackusdw))]
6308pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6309    unsafe {
6310        let pack = _mm_packus_epi32(a, b).as_i16x8();
6311        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6312    }
6313}
6314
6315/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6316///
6317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi32&expand=4123)
6318#[inline]
6319#[target_feature(enable = "avx512bw,avx512vl")]
6320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6321#[cfg_attr(test, assert_instr(vpackusdw))]
6322pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6323    unsafe {
6324        let pack = _mm_packus_epi32(a, b).as_i16x8();
6325        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6326    }
6327}
6328
6329/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst.
6330///
6331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi16&expand=4121)
6332#[inline]
6333#[target_feature(enable = "avx512bw")]
6334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6335#[cfg_attr(test, assert_instr(vpackuswb))]
6336pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
6337    unsafe { transmute(vpackuswb(a.as_i16x32(), b.as_i16x32())) }
6338}
6339
6340/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6341///
6342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi16&expand=4119)
6343#[inline]
6344#[target_feature(enable = "avx512bw")]
6345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6346#[cfg_attr(test, assert_instr(vpackuswb))]
6347pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6348    unsafe {
6349        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6350        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6351    }
6352}
6353
6354/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6355///
6356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi16&expand=4120)
6357#[inline]
6358#[target_feature(enable = "avx512bw")]
6359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6360#[cfg_attr(test, assert_instr(vpackuswb))]
6361pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6362    unsafe {
6363        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6364        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6365    }
6366}
6367
6368/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6369///
6370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi16&expand=4116)
6371#[inline]
6372#[target_feature(enable = "avx512bw,avx512vl")]
6373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6374#[cfg_attr(test, assert_instr(vpackuswb))]
6375pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6376    unsafe {
6377        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6378        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6379    }
6380}
6381
6382/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6383///
6384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi16&expand=4117)
6385#[inline]
6386#[target_feature(enable = "avx512bw,avx512vl")]
6387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6388#[cfg_attr(test, assert_instr(vpackuswb))]
6389pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6390    unsafe {
6391        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6392        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6393    }
6394}
6395
6396/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6397///
6398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi16&expand=4113)
6399#[inline]
6400#[target_feature(enable = "avx512bw,avx512vl")]
6401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6402#[cfg_attr(test, assert_instr(vpackuswb))]
6403pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6404    unsafe {
6405        let pack = _mm_packus_epi16(a, b).as_i8x16();
6406        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6407    }
6408}
6409
6410/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6411///
6412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi16&expand=4114)
6413#[inline]
6414#[target_feature(enable = "avx512bw,avx512vl")]
6415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6416#[cfg_attr(test, assert_instr(vpackuswb))]
6417pub fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6418    unsafe {
6419        let pack = _mm_packus_epi16(a, b).as_i8x16();
6420        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6421    }
6422}
6423
6424/// Average packed unsigned 16-bit integers in a and b, and store the results in dst.
6425///
6426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu16&expand=388)
6427#[inline]
6428#[target_feature(enable = "avx512bw")]
6429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6430#[cfg_attr(test, assert_instr(vpavgw))]
6431pub fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
6432    unsafe {
6433        let a = simd_cast::<_, u32x32>(a.as_u16x32());
6434        let b = simd_cast::<_, u32x32>(b.as_u16x32());
6435        let r = simd_shr(simd_add(simd_add(a, b), u32x32::splat(1)), u32x32::splat(1));
6436        transmute(simd_cast::<_, u16x32>(r))
6437    }
6438}
6439
6440/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6441///
6442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu16&expand=389)
6443#[inline]
6444#[target_feature(enable = "avx512bw")]
6445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6446#[cfg_attr(test, assert_instr(vpavgw))]
6447pub fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6448    unsafe {
6449        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6450        transmute(simd_select_bitmask(k, avg, src.as_u16x32()))
6451    }
6452}
6453
6454/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6455///
6456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu16&expand=390)
6457#[inline]
6458#[target_feature(enable = "avx512bw")]
6459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6460#[cfg_attr(test, assert_instr(vpavgw))]
6461pub fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6462    unsafe {
6463        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6464        transmute(simd_select_bitmask(k, avg, u16x32::ZERO))
6465    }
6466}
6467
6468/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6469///
6470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu16&expand=386)
6471#[inline]
6472#[target_feature(enable = "avx512bw,avx512vl")]
6473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6474#[cfg_attr(test, assert_instr(vpavgw))]
6475pub fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6476    unsafe {
6477        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6478        transmute(simd_select_bitmask(k, avg, src.as_u16x16()))
6479    }
6480}
6481
6482/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6483///
6484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu16&expand=387)
6485#[inline]
6486#[target_feature(enable = "avx512bw,avx512vl")]
6487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6488#[cfg_attr(test, assert_instr(vpavgw))]
6489pub fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6490    unsafe {
6491        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6492        transmute(simd_select_bitmask(k, avg, u16x16::ZERO))
6493    }
6494}
6495
6496/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6497///
6498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu16&expand=383)
6499#[inline]
6500#[target_feature(enable = "avx512bw,avx512vl")]
6501#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6502#[cfg_attr(test, assert_instr(vpavgw))]
6503pub fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6504    unsafe {
6505        let avg = _mm_avg_epu16(a, b).as_u16x8();
6506        transmute(simd_select_bitmask(k, avg, src.as_u16x8()))
6507    }
6508}
6509
6510/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6511///
6512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu16&expand=384)
6513#[inline]
6514#[target_feature(enable = "avx512bw,avx512vl")]
6515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6516#[cfg_attr(test, assert_instr(vpavgw))]
6517pub fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6518    unsafe {
6519        let avg = _mm_avg_epu16(a, b).as_u16x8();
6520        transmute(simd_select_bitmask(k, avg, u16x8::ZERO))
6521    }
6522}
6523
6524/// Average packed unsigned 8-bit integers in a and b, and store the results in dst.
6525///
6526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu8&expand=397)
6527#[inline]
6528#[target_feature(enable = "avx512bw")]
6529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6530#[cfg_attr(test, assert_instr(vpavgb))]
6531pub fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
6532    unsafe {
6533        let a = simd_cast::<_, u16x64>(a.as_u8x64());
6534        let b = simd_cast::<_, u16x64>(b.as_u8x64());
6535        let r = simd_shr(simd_add(simd_add(a, b), u16x64::splat(1)), u16x64::splat(1));
6536        transmute(simd_cast::<_, u8x64>(r))
6537    }
6538}
6539
6540/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6541///
6542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu8&expand=398)
6543#[inline]
6544#[target_feature(enable = "avx512bw")]
6545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6546#[cfg_attr(test, assert_instr(vpavgb))]
6547pub fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6548    unsafe {
6549        let avg = _mm512_avg_epu8(a, b).as_u8x64();
6550        transmute(simd_select_bitmask(k, avg, src.as_u8x64()))
6551    }
6552}
6553
6554/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6555///
6556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu8&expand=399)
6557#[inline]
6558#[target_feature(enable = "avx512bw")]
6559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6560#[cfg_attr(test, assert_instr(vpavgb))]
6561pub fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6562    unsafe {
6563        let avg = _mm512_avg_epu8(a, b).as_u8x64();
6564        transmute(simd_select_bitmask(k, avg, u8x64::ZERO))
6565    }
6566}
6567
6568/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6569///
6570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu8&expand=395)
6571#[inline]
6572#[target_feature(enable = "avx512bw,avx512vl")]
6573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6574#[cfg_attr(test, assert_instr(vpavgb))]
6575pub fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6576    unsafe {
6577        let avg = _mm256_avg_epu8(a, b).as_u8x32();
6578        transmute(simd_select_bitmask(k, avg, src.as_u8x32()))
6579    }
6580}
6581
6582/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6583///
6584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu8&expand=396)
6585#[inline]
6586#[target_feature(enable = "avx512bw,avx512vl")]
6587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6588#[cfg_attr(test, assert_instr(vpavgb))]
6589pub fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6590    unsafe {
6591        let avg = _mm256_avg_epu8(a, b).as_u8x32();
6592        transmute(simd_select_bitmask(k, avg, u8x32::ZERO))
6593    }
6594}
6595
6596/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6597///
6598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu8&expand=392)
6599#[inline]
6600#[target_feature(enable = "avx512bw,avx512vl")]
6601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6602#[cfg_attr(test, assert_instr(vpavgb))]
6603pub fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6604    unsafe {
6605        let avg = _mm_avg_epu8(a, b).as_u8x16();
6606        transmute(simd_select_bitmask(k, avg, src.as_u8x16()))
6607    }
6608}
6609
6610/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6611///
6612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu8&expand=393)
6613#[inline]
6614#[target_feature(enable = "avx512bw,avx512vl")]
6615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6616#[cfg_attr(test, assert_instr(vpavgb))]
6617pub fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6618    unsafe {
6619        let avg = _mm_avg_epu8(a, b).as_u8x16();
6620        transmute(simd_select_bitmask(k, avg, u8x16::ZERO))
6621    }
6622}
6623
6624/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst.
6625///
6626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi16&expand=5271)
6627#[inline]
6628#[target_feature(enable = "avx512bw")]
6629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6630#[cfg_attr(test, assert_instr(vpsllw))]
6631pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
6632    unsafe { transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) }
6633}
6634
6635/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6636///
6637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi16&expand=5269)
6638#[inline]
6639#[target_feature(enable = "avx512bw")]
6640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6641#[cfg_attr(test, assert_instr(vpsllw))]
6642pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6643    unsafe {
6644        let shf = _mm512_sll_epi16(a, count).as_i16x32();
6645        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6646    }
6647}
6648
6649/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6650///
6651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi16&expand=5270)
6652#[inline]
6653#[target_feature(enable = "avx512bw")]
6654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6655#[cfg_attr(test, assert_instr(vpsllw))]
6656pub fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6657    unsafe {
6658        let shf = _mm512_sll_epi16(a, count).as_i16x32();
6659        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6660    }
6661}
6662
6663/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6664///
6665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi16&expand=5266)
6666#[inline]
6667#[target_feature(enable = "avx512bw,avx512vl")]
6668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6669#[cfg_attr(test, assert_instr(vpsllw))]
6670pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
6671    unsafe {
6672        let shf = _mm256_sll_epi16(a, count).as_i16x16();
6673        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
6674    }
6675}
6676
6677/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6678///
6679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi16&expand=5267)
6680#[inline]
6681#[target_feature(enable = "avx512bw,avx512vl")]
6682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6683#[cfg_attr(test, assert_instr(vpsllw))]
6684pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
6685    unsafe {
6686        let shf = _mm256_sll_epi16(a, count).as_i16x16();
6687        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
6688    }
6689}
6690
6691/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6692///
6693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi16&expand=5263)
6694#[inline]
6695#[target_feature(enable = "avx512bw,avx512vl")]
6696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6697#[cfg_attr(test, assert_instr(vpsllw))]
6698pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6699    unsafe {
6700        let shf = _mm_sll_epi16(a, count).as_i16x8();
6701        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
6702    }
6703}
6704
6705/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6706///
6707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi16&expand=5264)
6708#[inline]
6709#[target_feature(enable = "avx512bw,avx512vl")]
6710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6711#[cfg_attr(test, assert_instr(vpsllw))]
6712pub fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6713    unsafe {
6714        let shf = _mm_sll_epi16(a, count).as_i16x8();
6715        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
6716    }
6717}
6718
6719/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
6720///
6721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi16&expand=5301)
6722#[inline]
6723#[target_feature(enable = "avx512bw")]
6724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6725#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6726#[rustc_legacy_const_generics(1)]
6727pub fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
6728    unsafe {
6729        static_assert_uimm_bits!(IMM8, 8);
6730        if IMM8 >= 16 {
6731            _mm512_setzero_si512()
6732        } else {
6733            transmute(simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
6734        }
6735    }
6736}
6737
6738/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6739///
6740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi16&expand=5299)
6741#[inline]
6742#[target_feature(enable = "avx512bw")]
6743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6744#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6745#[rustc_legacy_const_generics(3)]
6746pub fn _mm512_mask_slli_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
6747    unsafe {
6748        static_assert_uimm_bits!(IMM8, 8);
6749        let shf = if IMM8 >= 16 {
6750            u16x32::ZERO
6751        } else {
6752            simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16))
6753        };
6754        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
6755    }
6756}
6757
6758/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6759///
6760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi16&expand=5300)
6761#[inline]
6762#[target_feature(enable = "avx512bw")]
6763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6764#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6765#[rustc_legacy_const_generics(2)]
6766pub fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
6767    unsafe {
6768        static_assert_uimm_bits!(IMM8, 8);
6769        if IMM8 >= 16 {
6770            _mm512_setzero_si512()
6771        } else {
6772            let shf = simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16));
6773            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
6774        }
6775    }
6776}
6777
6778/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6779///
6780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi16&expand=5296)
6781#[inline]
6782#[target_feature(enable = "avx512bw,avx512vl")]
6783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6784#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6785#[rustc_legacy_const_generics(3)]
6786pub fn _mm256_mask_slli_epi16<const IMM8: u32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
6787    unsafe {
6788        static_assert_uimm_bits!(IMM8, 8);
6789        let shf = if IMM8 >= 16 {
6790            u16x16::ZERO
6791        } else {
6792            simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16))
6793        };
6794        transmute(simd_select_bitmask(k, shf, src.as_u16x16()))
6795    }
6796}
6797
6798/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6799///
6800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi16&expand=5297)
6801#[inline]
6802#[target_feature(enable = "avx512bw,avx512vl")]
6803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6804#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6805#[rustc_legacy_const_generics(2)]
6806pub fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
6807    unsafe {
6808        static_assert_uimm_bits!(IMM8, 8);
6809        if IMM8 >= 16 {
6810            _mm256_setzero_si256()
6811        } else {
6812            let shf = simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16));
6813            transmute(simd_select_bitmask(k, shf, u16x16::ZERO))
6814        }
6815    }
6816}
6817
6818/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6819///
6820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi16&expand=5293)
6821#[inline]
6822#[target_feature(enable = "avx512bw,avx512vl")]
6823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6824#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6825#[rustc_legacy_const_generics(3)]
6826pub fn _mm_mask_slli_epi16<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
6827    unsafe {
6828        static_assert_uimm_bits!(IMM8, 8);
6829        let shf = if IMM8 >= 16 {
6830            u16x8::ZERO
6831        } else {
6832            simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16))
6833        };
6834        transmute(simd_select_bitmask(k, shf, src.as_u16x8()))
6835    }
6836}
6837
6838/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6839///
6840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi16&expand=5294)
6841#[inline]
6842#[target_feature(enable = "avx512bw,avx512vl")]
6843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6844#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6845#[rustc_legacy_const_generics(2)]
6846pub fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
6847    unsafe {
6848        static_assert_uimm_bits!(IMM8, 8);
6849        if IMM8 >= 16 {
6850            _mm_setzero_si128()
6851        } else {
6852            let shf = simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16));
6853            transmute(simd_select_bitmask(k, shf, u16x8::ZERO))
6854        }
6855    }
6856}
6857
6858/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6859///
6860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi16&expand=5333)
6861#[inline]
6862#[target_feature(enable = "avx512bw")]
6863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6864#[cfg_attr(test, assert_instr(vpsllvw))]
6865pub fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
6866    unsafe {
6867        let count = count.as_u16x32();
6868        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
6869        let count = simd_select(no_overflow, count, u16x32::ZERO);
6870        simd_select(no_overflow, simd_shl(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
6871    }
6872}
6873
6874/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6875///
6876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi16&expand=5331)
6877#[inline]
6878#[target_feature(enable = "avx512bw")]
6879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6880#[cfg_attr(test, assert_instr(vpsllvw))]
6881pub fn _mm512_mask_sllv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
6882    unsafe {
6883        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
6884        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6885    }
6886}
6887
6888/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6889///
6890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi16&expand=5332)
6891#[inline]
6892#[target_feature(enable = "avx512bw")]
6893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6894#[cfg_attr(test, assert_instr(vpsllvw))]
6895pub fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
6896    unsafe {
6897        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
6898        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6899    }
6900}
6901
6902/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6903///
6904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi16&expand=5330)
6905#[inline]
6906#[target_feature(enable = "avx512bw,avx512vl")]
6907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6908#[cfg_attr(test, assert_instr(vpsllvw))]
6909pub fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
6910    unsafe {
6911        let count = count.as_u16x16();
6912        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
6913        let count = simd_select(no_overflow, count, u16x16::ZERO);
6914        simd_select(no_overflow, simd_shl(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
6915    }
6916}
6917
6918/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6919///
6920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi16&expand=5328)
6921#[inline]
6922#[target_feature(enable = "avx512bw,avx512vl")]
6923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6924#[cfg_attr(test, assert_instr(vpsllvw))]
6925pub fn _mm256_mask_sllv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
6926    unsafe {
6927        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
6928        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
6929    }
6930}
6931
6932/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6933///
6934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi16&expand=5329)
6935#[inline]
6936#[target_feature(enable = "avx512bw,avx512vl")]
6937#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6938#[cfg_attr(test, assert_instr(vpsllvw))]
6939pub fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
6940    unsafe {
6941        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
6942        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
6943    }
6944}
6945
6946/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6947///
6948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi16&expand=5327)
6949#[inline]
6950#[target_feature(enable = "avx512bw,avx512vl")]
6951#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6952#[cfg_attr(test, assert_instr(vpsllvw))]
6953pub fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
6954    unsafe {
6955        let count = count.as_u16x8();
6956        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
6957        let count = simd_select(no_overflow, count, u16x8::ZERO);
6958        simd_select(no_overflow, simd_shl(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
6959    }
6960}
6961
6962/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6963///
6964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi16&expand=5325)
6965#[inline]
6966#[target_feature(enable = "avx512bw,avx512vl")]
6967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6968#[cfg_attr(test, assert_instr(vpsllvw))]
6969pub fn _mm_mask_sllv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6970    unsafe {
6971        let shf = _mm_sllv_epi16(a, count).as_i16x8();
6972        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
6973    }
6974}
6975
6976/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6977///
6978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi16&expand=5326)
6979#[inline]
6980#[target_feature(enable = "avx512bw,avx512vl")]
6981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6982#[cfg_attr(test, assert_instr(vpsllvw))]
6983pub fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6984    unsafe {
6985        let shf = _mm_sllv_epi16(a, count).as_i16x8();
6986        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
6987    }
6988}
6989
6990/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst.
6991///
6992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi16&expand=5483)
6993#[inline]
6994#[target_feature(enable = "avx512bw")]
6995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6996#[cfg_attr(test, assert_instr(vpsrlw))]
6997pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
6998    unsafe { transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) }
6999}
7000
7001/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7002///
7003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi16&expand=5481)
7004#[inline]
7005#[target_feature(enable = "avx512bw")]
7006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7007#[cfg_attr(test, assert_instr(vpsrlw))]
7008pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7009    unsafe {
7010        let shf = _mm512_srl_epi16(a, count).as_i16x32();
7011        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7012    }
7013}
7014
7015/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7016///
7017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi16&expand=5482)
7018#[inline]
7019#[target_feature(enable = "avx512bw")]
7020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7021#[cfg_attr(test, assert_instr(vpsrlw))]
7022pub fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7023    unsafe {
7024        let shf = _mm512_srl_epi16(a, count).as_i16x32();
7025        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7026    }
7027}
7028
7029/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7030///
7031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi16&expand=5478)
7032#[inline]
7033#[target_feature(enable = "avx512bw,avx512vl")]
7034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7035#[cfg_attr(test, assert_instr(vpsrlw))]
7036pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7037    unsafe {
7038        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7039        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7040    }
7041}
7042
7043/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7044///
7045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi16&expand=5479)
7046#[inline]
7047#[target_feature(enable = "avx512bw,avx512vl")]
7048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7049#[cfg_attr(test, assert_instr(vpsrlw))]
7050pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7051    unsafe {
7052        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7053        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7054    }
7055}
7056
7057/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7058///
7059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi16&expand=5475)
7060#[inline]
7061#[target_feature(enable = "avx512bw,avx512vl")]
7062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7063#[cfg_attr(test, assert_instr(vpsrlw))]
7064pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7065    unsafe {
7066        let shf = _mm_srl_epi16(a, count).as_i16x8();
7067        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7068    }
7069}
7070
7071/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7072///
7073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi16&expand=5476)
7074#[inline]
7075#[target_feature(enable = "avx512bw,avx512vl")]
7076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7077#[cfg_attr(test, assert_instr(vpsrlw))]
7078pub fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7079    unsafe {
7080        let shf = _mm_srl_epi16(a, count).as_i16x8();
7081        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7082    }
7083}
7084
7085/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
7086///
7087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi16&expand=5513)
7088#[inline]
7089#[target_feature(enable = "avx512bw")]
7090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7091#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7092#[rustc_legacy_const_generics(1)]
7093pub fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7094    unsafe {
7095        static_assert_uimm_bits!(IMM8, 8);
7096        if IMM8 >= 16 {
7097            _mm512_setzero_si512()
7098        } else {
7099            transmute(simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
7100        }
7101    }
7102}
7103
7104/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7105///
7106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi16&expand=5511)
7107#[inline]
7108#[target_feature(enable = "avx512bw")]
7109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7110#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7111#[rustc_legacy_const_generics(3)]
7112pub fn _mm512_mask_srli_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
7113    unsafe {
7114        static_assert_uimm_bits!(IMM8, 8);
7115        let shf = if IMM8 >= 16 {
7116            u16x32::ZERO
7117        } else {
7118            simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16))
7119        };
7120        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
7121    }
7122}
7123
7124/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7125///
7126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi16&expand=5512)
7127#[inline]
7128#[target_feature(enable = "avx512bw")]
7129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7130#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7131#[rustc_legacy_const_generics(2)]
7132pub fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
7133    unsafe {
7134        static_assert_uimm_bits!(IMM8, 8);
7135        //imm8 should be u32, it seems the document to verify is incorrect
7136        if IMM8 >= 16 {
7137            _mm512_setzero_si512()
7138        } else {
7139            let shf = simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16));
7140            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
7141        }
7142    }
7143}
7144
7145/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7146///
7147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi16&expand=5508)
7148#[inline]
7149#[target_feature(enable = "avx512bw,avx512vl")]
7150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7151#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7152#[rustc_legacy_const_generics(3)]
7153pub fn _mm256_mask_srli_epi16<const IMM8: i32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
7154    unsafe {
7155        static_assert_uimm_bits!(IMM8, 8);
7156        let shf = _mm256_srli_epi16::<IMM8>(a);
7157        transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16()))
7158    }
7159}
7160
7161/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7162///
7163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi16&expand=5509)
7164#[inline]
7165#[target_feature(enable = "avx512bw,avx512vl")]
7166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7167#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7168#[rustc_legacy_const_generics(2)]
7169pub fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
7170    unsafe {
7171        static_assert_uimm_bits!(IMM8, 8);
7172        let shf = _mm256_srli_epi16::<IMM8>(a);
7173        transmute(simd_select_bitmask(k, shf.as_i16x16(), i16x16::ZERO))
7174    }
7175}
7176
7177/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7178///
7179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi16&expand=5505)
7180#[inline]
7181#[target_feature(enable = "avx512bw,avx512vl")]
7182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7183#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7184#[rustc_legacy_const_generics(3)]
7185pub fn _mm_mask_srli_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
7186    unsafe {
7187        static_assert_uimm_bits!(IMM8, 8);
7188        let shf = _mm_srli_epi16::<IMM8>(a);
7189        transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8()))
7190    }
7191}
7192
7193/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7194///
7195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi16&expand=5506)
7196#[inline]
7197#[target_feature(enable = "avx512bw,avx512vl")]
7198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7199#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7200#[rustc_legacy_const_generics(2)]
7201pub fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
7202    unsafe {
7203        static_assert_uimm_bits!(IMM8, 8);
7204        let shf = _mm_srli_epi16::<IMM8>(a);
7205        transmute(simd_select_bitmask(k, shf.as_i16x8(), i16x8::ZERO))
7206    }
7207}
7208
7209/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7210///
7211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi16&expand=5545)
7212#[inline]
7213#[target_feature(enable = "avx512bw")]
7214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7215#[cfg_attr(test, assert_instr(vpsrlvw))]
7216pub fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
7217    unsafe {
7218        let count = count.as_u16x32();
7219        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
7220        let count = simd_select(no_overflow, count, u16x32::ZERO);
7221        simd_select(no_overflow, simd_shr(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
7222    }
7223}
7224
7225/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7226///
7227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi16&expand=5543)
7228#[inline]
7229#[target_feature(enable = "avx512bw")]
7230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7231#[cfg_attr(test, assert_instr(vpsrlvw))]
7232pub fn _mm512_mask_srlv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7233    unsafe {
7234        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7235        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7236    }
7237}
7238
7239/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7240///
7241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi16&expand=5544)
7242#[inline]
7243#[target_feature(enable = "avx512bw")]
7244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7245#[cfg_attr(test, assert_instr(vpsrlvw))]
7246pub fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7247    unsafe {
7248        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7249        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7250    }
7251}
7252
7253/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7254///
7255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi16&expand=5542)
7256#[inline]
7257#[target_feature(enable = "avx512bw,avx512vl")]
7258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7259#[cfg_attr(test, assert_instr(vpsrlvw))]
7260pub fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
7261    unsafe {
7262        let count = count.as_u16x16();
7263        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
7264        let count = simd_select(no_overflow, count, u16x16::ZERO);
7265        simd_select(no_overflow, simd_shr(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
7266    }
7267}
7268
7269/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7270///
7271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi16&expand=5540)
7272#[inline]
7273#[target_feature(enable = "avx512bw,avx512vl")]
7274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7275#[cfg_attr(test, assert_instr(vpsrlvw))]
7276pub fn _mm256_mask_srlv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7277    unsafe {
7278        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7279        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7280    }
7281}
7282
7283/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7284///
7285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi16&expand=5541)
7286#[inline]
7287#[target_feature(enable = "avx512bw,avx512vl")]
7288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7289#[cfg_attr(test, assert_instr(vpsrlvw))]
7290pub fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7291    unsafe {
7292        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7293        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7294    }
7295}
7296
7297/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7298///
7299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi16&expand=5539)
7300#[inline]
7301#[target_feature(enable = "avx512bw,avx512vl")]
7302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7303#[cfg_attr(test, assert_instr(vpsrlvw))]
7304pub fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
7305    unsafe {
7306        let count = count.as_u16x8();
7307        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
7308        let count = simd_select(no_overflow, count, u16x8::ZERO);
7309        simd_select(no_overflow, simd_shr(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
7310    }
7311}
7312
7313/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7314///
7315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi16&expand=5537)
7316#[inline]
7317#[target_feature(enable = "avx512bw,avx512vl")]
7318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7319#[cfg_attr(test, assert_instr(vpsrlvw))]
7320pub fn _mm_mask_srlv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7321    unsafe {
7322        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7323        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7324    }
7325}
7326
7327/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7328///
7329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi16&expand=5538)
7330#[inline]
7331#[target_feature(enable = "avx512bw,avx512vl")]
7332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7333#[cfg_attr(test, assert_instr(vpsrlvw))]
7334pub fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7335    unsafe {
7336        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7337        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7338    }
7339}
7340
7341/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst.
7342///
7343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi16&expand=5398)
7344#[inline]
7345#[target_feature(enable = "avx512bw")]
7346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7347#[cfg_attr(test, assert_instr(vpsraw))]
7348pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
7349    unsafe { transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) }
7350}
7351
7352/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7353///
7354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi16&expand=5396)
7355#[inline]
7356#[target_feature(enable = "avx512bw")]
7357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7358#[cfg_attr(test, assert_instr(vpsraw))]
7359pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7360    unsafe {
7361        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7362        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7363    }
7364}
7365
7366/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7367///
7368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi16&expand=5397)
7369#[inline]
7370#[target_feature(enable = "avx512bw")]
7371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7372#[cfg_attr(test, assert_instr(vpsraw))]
7373pub fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7374    unsafe {
7375        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7376        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7377    }
7378}
7379
7380/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7381///
7382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi16&expand=5393)
7383#[inline]
7384#[target_feature(enable = "avx512bw,avx512vl")]
7385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7386#[cfg_attr(test, assert_instr(vpsraw))]
7387pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7388    unsafe {
7389        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7390        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7391    }
7392}
7393
7394/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7395///
7396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi16&expand=5394)
7397#[inline]
7398#[target_feature(enable = "avx512bw,avx512vl")]
7399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7400#[cfg_attr(test, assert_instr(vpsraw))]
7401pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7402    unsafe {
7403        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7404        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7405    }
7406}
7407
7408/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7409///
7410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi16&expand=5390)
7411#[inline]
7412#[target_feature(enable = "avx512bw,avx512vl")]
7413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7414#[cfg_attr(test, assert_instr(vpsraw))]
7415pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7416    unsafe {
7417        let shf = _mm_sra_epi16(a, count).as_i16x8();
7418        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7419    }
7420}
7421
7422/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7423///
7424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi16&expand=5391)
7425#[inline]
7426#[target_feature(enable = "avx512bw,avx512vl")]
7427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7428#[cfg_attr(test, assert_instr(vpsraw))]
7429pub fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7430    unsafe {
7431        let shf = _mm_sra_epi16(a, count).as_i16x8();
7432        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7433    }
7434}
7435
7436/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
7437///
7438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi16&expand=5427)
7439#[inline]
7440#[target_feature(enable = "avx512bw")]
7441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7442#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7443#[rustc_legacy_const_generics(1)]
7444pub fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7445    unsafe {
7446        static_assert_uimm_bits!(IMM8, 8);
7447        transmute(simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)))
7448    }
7449}
7450
7451/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7452///
7453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi16&expand=5425)
7454#[inline]
7455#[target_feature(enable = "avx512bw")]
7456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7457#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7458#[rustc_legacy_const_generics(3)]
7459pub fn _mm512_mask_srai_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
7460    unsafe {
7461        static_assert_uimm_bits!(IMM8, 8);
7462        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
7463        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7464    }
7465}
7466
7467/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7468///
7469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi16&expand=5426)
7470#[inline]
7471#[target_feature(enable = "avx512bw")]
7472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7473#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7474#[rustc_legacy_const_generics(2)]
7475pub fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
7476    unsafe {
7477        static_assert_uimm_bits!(IMM8, 8);
7478        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
7479        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7480    }
7481}
7482
7483/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7484///
7485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi16&expand=5422)
7486#[inline]
7487#[target_feature(enable = "avx512bw,avx512vl")]
7488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7489#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7490#[rustc_legacy_const_generics(3)]
7491pub fn _mm256_mask_srai_epi16<const IMM8: u32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
7492    unsafe {
7493        static_assert_uimm_bits!(IMM8, 8);
7494        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
7495        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
7496    }
7497}
7498
7499/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7500///
7501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi16&expand=5423)
7502#[inline]
7503#[target_feature(enable = "avx512bw,avx512vl")]
7504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7505#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7506#[rustc_legacy_const_generics(2)]
7507pub fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
7508    unsafe {
7509        static_assert_uimm_bits!(IMM8, 8);
7510        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
7511        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
7512    }
7513}
7514
7515/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7516///
7517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi16&expand=5419)
7518#[inline]
7519#[target_feature(enable = "avx512bw,avx512vl")]
7520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7521#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7522#[rustc_legacy_const_generics(3)]
7523pub fn _mm_mask_srai_epi16<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
7524    unsafe {
7525        static_assert_uimm_bits!(IMM8, 8);
7526        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
7527        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
7528    }
7529}
7530
7531/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7532///
7533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi16&expand=5420)
7534#[inline]
7535#[target_feature(enable = "avx512bw,avx512vl")]
7536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7537#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7538#[rustc_legacy_const_generics(2)]
7539pub fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
7540    unsafe {
7541        static_assert_uimm_bits!(IMM8, 8);
7542        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
7543        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
7544    }
7545}
7546
7547/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7548///
7549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi16&expand=5456)
7550#[inline]
7551#[target_feature(enable = "avx512bw")]
7552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7553#[cfg_attr(test, assert_instr(vpsravw))]
7554pub fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
7555    unsafe {
7556        let count = count.as_u16x32();
7557        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
7558        let count = simd_select(no_overflow, transmute(count), i16x32::splat(15));
7559        simd_shr(a.as_i16x32(), count).as_m512i()
7560    }
7561}
7562
7563/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7564///
7565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi16&expand=5454)
7566#[inline]
7567#[target_feature(enable = "avx512bw")]
7568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7569#[cfg_attr(test, assert_instr(vpsravw))]
7570pub fn _mm512_mask_srav_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7571    unsafe {
7572        let shf = _mm512_srav_epi16(a, count).as_i16x32();
7573        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7574    }
7575}
7576
7577/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7578///
7579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi16&expand=5455)
7580#[inline]
7581#[target_feature(enable = "avx512bw")]
7582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7583#[cfg_attr(test, assert_instr(vpsravw))]
7584pub fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7585    unsafe {
7586        let shf = _mm512_srav_epi16(a, count).as_i16x32();
7587        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7588    }
7589}
7590
7591/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7592///
7593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi16&expand=5453)
7594#[inline]
7595#[target_feature(enable = "avx512bw,avx512vl")]
7596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7597#[cfg_attr(test, assert_instr(vpsravw))]
7598pub fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
7599    unsafe {
7600        let count = count.as_u16x16();
7601        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
7602        let count = simd_select(no_overflow, transmute(count), i16x16::splat(15));
7603        simd_shr(a.as_i16x16(), count).as_m256i()
7604    }
7605}
7606
7607/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7608///
7609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi16&expand=5451)
7610#[inline]
7611#[target_feature(enable = "avx512bw,avx512vl")]
7612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7613#[cfg_attr(test, assert_instr(vpsravw))]
7614pub fn _mm256_mask_srav_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7615    unsafe {
7616        let shf = _mm256_srav_epi16(a, count).as_i16x16();
7617        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7618    }
7619}
7620
7621/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7622///
7623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi16&expand=5452)
7624#[inline]
7625#[target_feature(enable = "avx512bw,avx512vl")]
7626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7627#[cfg_attr(test, assert_instr(vpsravw))]
7628pub fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7629    unsafe {
7630        let shf = _mm256_srav_epi16(a, count).as_i16x16();
7631        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7632    }
7633}
7634
7635/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7636///
7637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi16&expand=5450)
7638#[inline]
7639#[target_feature(enable = "avx512bw,avx512vl")]
7640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7641#[cfg_attr(test, assert_instr(vpsravw))]
7642pub fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
7643    unsafe {
7644        let count = count.as_u16x8();
7645        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
7646        let count = simd_select(no_overflow, transmute(count), i16x8::splat(15));
7647        simd_shr(a.as_i16x8(), count).as_m128i()
7648    }
7649}
7650
7651/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7652///
7653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi16&expand=5448)
7654#[inline]
7655#[target_feature(enable = "avx512bw,avx512vl")]
7656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7657#[cfg_attr(test, assert_instr(vpsravw))]
7658pub fn _mm_mask_srav_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7659    unsafe {
7660        let shf = _mm_srav_epi16(a, count).as_i16x8();
7661        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7662    }
7663}
7664
7665/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7666///
7667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi16&expand=5449)
7668#[inline]
7669#[target_feature(enable = "avx512bw,avx512vl")]
7670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7671#[cfg_attr(test, assert_instr(vpsravw))]
7672pub fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7673    unsafe {
7674        let shf = _mm_srav_epi16(a, count).as_i16x8();
7675        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7676    }
7677}
7678
7679/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7680///
7681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi16&expand=4226)
7682#[inline]
7683#[target_feature(enable = "avx512bw")]
7684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7685#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7686pub fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
7687    unsafe { transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32())) }
7688}
7689
7690/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7691///
7692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi16&expand=4223)
7693#[inline]
7694#[target_feature(enable = "avx512bw")]
7695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7696#[cfg_attr(test, assert_instr(vpermt2w))]
7697pub fn _mm512_mask_permutex2var_epi16(
7698    a: __m512i,
7699    k: __mmask32,
7700    idx: __m512i,
7701    b: __m512i,
7702) -> __m512i {
7703    unsafe {
7704        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7705        transmute(simd_select_bitmask(k, permute, a.as_i16x32()))
7706    }
7707}
7708
7709/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7710///
7711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi16&expand=4225)
7712#[inline]
7713#[target_feature(enable = "avx512bw")]
7714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7715#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7716pub fn _mm512_maskz_permutex2var_epi16(
7717    k: __mmask32,
7718    a: __m512i,
7719    idx: __m512i,
7720    b: __m512i,
7721) -> __m512i {
7722    unsafe {
7723        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7724        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
7725    }
7726}
7727
7728/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7729///
7730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi16&expand=4224)
7731#[inline]
7732#[target_feature(enable = "avx512bw")]
7733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7734#[cfg_attr(test, assert_instr(vpermi2w))]
7735pub fn _mm512_mask2_permutex2var_epi16(
7736    a: __m512i,
7737    idx: __m512i,
7738    k: __mmask32,
7739    b: __m512i,
7740) -> __m512i {
7741    unsafe {
7742        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7743        transmute(simd_select_bitmask(k, permute, idx.as_i16x32()))
7744    }
7745}
7746
7747/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7748///
7749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi16&expand=4222)
7750#[inline]
7751#[target_feature(enable = "avx512bw,avx512vl")]
7752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7753#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7754pub fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
7755    unsafe { transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16())) }
7756}
7757
7758/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7759///
7760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi16&expand=4219)
7761#[inline]
7762#[target_feature(enable = "avx512bw,avx512vl")]
7763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7764#[cfg_attr(test, assert_instr(vpermt2w))]
7765pub fn _mm256_mask_permutex2var_epi16(
7766    a: __m256i,
7767    k: __mmask16,
7768    idx: __m256i,
7769    b: __m256i,
7770) -> __m256i {
7771    unsafe {
7772        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7773        transmute(simd_select_bitmask(k, permute, a.as_i16x16()))
7774    }
7775}
7776
7777/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7778///
7779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi16&expand=4221)
7780#[inline]
7781#[target_feature(enable = "avx512bw,avx512vl")]
7782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7783#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7784pub fn _mm256_maskz_permutex2var_epi16(
7785    k: __mmask16,
7786    a: __m256i,
7787    idx: __m256i,
7788    b: __m256i,
7789) -> __m256i {
7790    unsafe {
7791        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7792        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
7793    }
7794}
7795
7796/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7797///
7798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi16&expand=4220)
7799#[inline]
7800#[target_feature(enable = "avx512bw,avx512vl")]
7801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7802#[cfg_attr(test, assert_instr(vpermi2w))]
7803pub fn _mm256_mask2_permutex2var_epi16(
7804    a: __m256i,
7805    idx: __m256i,
7806    k: __mmask16,
7807    b: __m256i,
7808) -> __m256i {
7809    unsafe {
7810        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7811        transmute(simd_select_bitmask(k, permute, idx.as_i16x16()))
7812    }
7813}
7814
7815/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7816///
7817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi16&expand=4218)
7818#[inline]
7819#[target_feature(enable = "avx512bw,avx512vl")]
7820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7821#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7822pub fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
7823    unsafe { transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8())) }
7824}
7825
7826/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7827///
7828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi16&expand=4215)
7829#[inline]
7830#[target_feature(enable = "avx512bw,avx512vl")]
7831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7832#[cfg_attr(test, assert_instr(vpermt2w))]
7833pub fn _mm_mask_permutex2var_epi16(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
7834    unsafe {
7835        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7836        transmute(simd_select_bitmask(k, permute, a.as_i16x8()))
7837    }
7838}
7839
7840/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7841///
7842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi16&expand=4217)
7843#[inline]
7844#[target_feature(enable = "avx512bw,avx512vl")]
7845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7846#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7847pub fn _mm_maskz_permutex2var_epi16(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
7848    unsafe {
7849        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7850        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
7851    }
7852}
7853
7854/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7855///
7856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi16&expand=4216)
7857#[inline]
7858#[target_feature(enable = "avx512bw,avx512vl")]
7859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7860#[cfg_attr(test, assert_instr(vpermi2w))]
7861pub fn _mm_mask2_permutex2var_epi16(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
7862    unsafe {
7863        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7864        transmute(simd_select_bitmask(k, permute, idx.as_i16x8()))
7865    }
7866}
7867
7868/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7869///
7870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi16&expand=4295)
7871#[inline]
7872#[target_feature(enable = "avx512bw")]
7873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7874#[cfg_attr(test, assert_instr(vpermw))]
7875pub fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i {
7876    unsafe { transmute(vpermw(a.as_i16x32(), idx.as_i16x32())) }
7877}
7878
7879/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7880///
7881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi16&expand=4293)
7882#[inline]
7883#[target_feature(enable = "avx512bw")]
7884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7885#[cfg_attr(test, assert_instr(vpermw))]
7886pub fn _mm512_mask_permutexvar_epi16(
7887    src: __m512i,
7888    k: __mmask32,
7889    idx: __m512i,
7890    a: __m512i,
7891) -> __m512i {
7892    unsafe {
7893        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
7894        transmute(simd_select_bitmask(k, permute, src.as_i16x32()))
7895    }
7896}
7897
7898/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7899///
7900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi16&expand=4294)
7901#[inline]
7902#[target_feature(enable = "avx512bw")]
7903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7904#[cfg_attr(test, assert_instr(vpermw))]
7905pub fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i {
7906    unsafe {
7907        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
7908        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
7909    }
7910}
7911
7912/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7913///
7914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi16&expand=4292)
7915#[inline]
7916#[target_feature(enable = "avx512bw,avx512vl")]
7917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7918#[cfg_attr(test, assert_instr(vpermw))]
7919pub fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i {
7920    unsafe { transmute(vpermw256(a.as_i16x16(), idx.as_i16x16())) }
7921}
7922
7923/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7924///
7925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi16&expand=4290)
7926#[inline]
7927#[target_feature(enable = "avx512bw,avx512vl")]
7928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7929#[cfg_attr(test, assert_instr(vpermw))]
7930pub fn _mm256_mask_permutexvar_epi16(
7931    src: __m256i,
7932    k: __mmask16,
7933    idx: __m256i,
7934    a: __m256i,
7935) -> __m256i {
7936    unsafe {
7937        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
7938        transmute(simd_select_bitmask(k, permute, src.as_i16x16()))
7939    }
7940}
7941
7942/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7943///
7944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi16&expand=4291)
7945#[inline]
7946#[target_feature(enable = "avx512bw,avx512vl")]
7947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7948#[cfg_attr(test, assert_instr(vpermw))]
7949pub fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i {
7950    unsafe {
7951        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
7952        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
7953    }
7954}
7955
7956/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7957///
7958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi16&expand=4289)
7959#[inline]
7960#[target_feature(enable = "avx512bw,avx512vl")]
7961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7962#[cfg_attr(test, assert_instr(vpermw))]
7963pub fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i {
7964    unsafe { transmute(vpermw128(a.as_i16x8(), idx.as_i16x8())) }
7965}
7966
7967/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7968///
7969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi16&expand=4287)
7970#[inline]
7971#[target_feature(enable = "avx512bw,avx512vl")]
7972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7973#[cfg_attr(test, assert_instr(vpermw))]
7974pub fn _mm_mask_permutexvar_epi16(src: __m128i, k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
7975    unsafe {
7976        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
7977        transmute(simd_select_bitmask(k, permute, src.as_i16x8()))
7978    }
7979}
7980
7981/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7982///
7983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi16&expand=4288)
7984#[inline]
7985#[target_feature(enable = "avx512bw,avx512vl")]
7986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7987#[cfg_attr(test, assert_instr(vpermw))]
7988pub fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
7989    unsafe {
7990        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
7991        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
7992    }
7993}
7994
7995/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
7996///
7997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi16&expand=430)
7998#[inline]
7999#[target_feature(enable = "avx512bw")]
8000#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8001#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8002pub fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8003    unsafe { transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32())) }
8004}
8005
8006/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8007///
8008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi16&expand=429)
8009#[inline]
8010#[target_feature(enable = "avx512bw,avx512vl")]
8011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8012#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8013pub fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8014    unsafe { transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16())) }
8015}
8016
8017/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8018///
8019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi16&expand=427)
8020#[inline]
8021#[target_feature(enable = "avx512bw,avx512vl")]
8022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8023#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8024pub fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8025    unsafe { transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8())) }
8026}
8027
8028/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8029///
8030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi8&expand=441)
8031#[inline]
8032#[target_feature(enable = "avx512bw")]
8033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8034#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8035pub fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8036    unsafe { transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64())) }
8037}
8038
8039/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8040///
8041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi8&expand=440)
8042#[inline]
8043#[target_feature(enable = "avx512bw,avx512vl")]
8044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8045#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8046pub fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8047    unsafe { transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32())) }
8048}
8049
8050/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8051///
8052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi8&expand=439)
8053#[inline]
8054#[target_feature(enable = "avx512bw,avx512vl")]
8055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8056#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8057pub fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8058    unsafe { transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16())) }
8059}
8060
8061/// Broadcast the low packed 16-bit integer from a to all elements of dst.
8062///
8063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastw_epi16&expand=587)
8064#[inline]
8065#[target_feature(enable = "avx512bw")]
8066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8067#[cfg_attr(test, assert_instr(vpbroadcastw))]
8068pub fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
8069    unsafe {
8070        let a = _mm512_castsi128_si512(a).as_i16x32();
8071        let ret: i16x32 = simd_shuffle!(
8072            a,
8073            a,
8074            [
8075                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8076                0, 0, 0, 0,
8077            ],
8078        );
8079        transmute(ret)
8080    }
8081}
8082
8083/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8084///
8085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastw_epi16&expand=588)
8086#[inline]
8087#[target_feature(enable = "avx512bw")]
8088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8089#[cfg_attr(test, assert_instr(vpbroadcastw))]
8090pub fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i {
8091    unsafe {
8092        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8093        transmute(simd_select_bitmask(k, broadcast, src.as_i16x32()))
8094    }
8095}
8096
8097/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8098///
8099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastw_epi16&expand=589)
8100#[inline]
8101#[target_feature(enable = "avx512bw")]
8102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8103#[cfg_attr(test, assert_instr(vpbroadcastw))]
8104pub fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
8105    unsafe {
8106        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8107        transmute(simd_select_bitmask(k, broadcast, i16x32::ZERO))
8108    }
8109}
8110
8111/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8112///
8113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastw_epi16&expand=585)
8114#[inline]
8115#[target_feature(enable = "avx512bw,avx512vl")]
8116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8117#[cfg_attr(test, assert_instr(vpbroadcastw))]
8118pub fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
8119    unsafe {
8120        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8121        transmute(simd_select_bitmask(k, broadcast, src.as_i16x16()))
8122    }
8123}
8124
8125/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8126///
8127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastw_epi16&expand=586)
8128#[inline]
8129#[target_feature(enable = "avx512bw,avx512vl")]
8130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8131#[cfg_attr(test, assert_instr(vpbroadcastw))]
8132pub fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i {
8133    unsafe {
8134        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8135        transmute(simd_select_bitmask(k, broadcast, i16x16::ZERO))
8136    }
8137}
8138
8139/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8140///
8141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastw_epi16&expand=582)
8142#[inline]
8143#[target_feature(enable = "avx512bw,avx512vl")]
8144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8145#[cfg_attr(test, assert_instr(vpbroadcastw))]
8146pub fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8147    unsafe {
8148        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8149        transmute(simd_select_bitmask(k, broadcast, src.as_i16x8()))
8150    }
8151}
8152
8153/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8154///
8155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastw_epi16&expand=583)
8156#[inline]
8157#[target_feature(enable = "avx512bw,avx512vl")]
8158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8159#[cfg_attr(test, assert_instr(vpbroadcastw))]
8160pub fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
8161    unsafe {
8162        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8163        transmute(simd_select_bitmask(k, broadcast, i16x8::ZERO))
8164    }
8165}
8166
8167/// Broadcast the low packed 8-bit integer from a to all elements of dst.
8168///
8169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastb_epi8&expand=536)
8170#[inline]
8171#[target_feature(enable = "avx512bw")]
8172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8173#[cfg_attr(test, assert_instr(vpbroadcastb))]
8174pub fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
8175    unsafe {
8176        let a = _mm512_castsi128_si512(a).as_i8x64();
8177        let ret: i8x64 = simd_shuffle!(
8178            a,
8179            a,
8180            [
8181                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8182                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8183                0, 0, 0, 0, 0, 0, 0, 0,
8184            ],
8185        );
8186        transmute(ret)
8187    }
8188}
8189
8190/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8191///
8192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastb_epi8&expand=537)
8193#[inline]
8194#[target_feature(enable = "avx512bw")]
8195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8196#[cfg_attr(test, assert_instr(vpbroadcastb))]
8197pub fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i {
8198    unsafe {
8199        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8200        transmute(simd_select_bitmask(k, broadcast, src.as_i8x64()))
8201    }
8202}
8203
8204/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8205///
8206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastb_epi8&expand=538)
8207#[inline]
8208#[target_feature(enable = "avx512bw")]
8209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8210#[cfg_attr(test, assert_instr(vpbroadcastb))]
8211pub fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
8212    unsafe {
8213        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8214        transmute(simd_select_bitmask(k, broadcast, i8x64::ZERO))
8215    }
8216}
8217
8218/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8219///
8220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastb_epi8&expand=534)
8221#[inline]
8222#[target_feature(enable = "avx512bw,avx512vl")]
8223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8224#[cfg_attr(test, assert_instr(vpbroadcastb))]
8225pub fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i {
8226    unsafe {
8227        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8228        transmute(simd_select_bitmask(k, broadcast, src.as_i8x32()))
8229    }
8230}
8231
8232/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8233///
8234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastb_epi8&expand=535)
8235#[inline]
8236#[target_feature(enable = "avx512bw,avx512vl")]
8237#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8238#[cfg_attr(test, assert_instr(vpbroadcastb))]
8239pub fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i {
8240    unsafe {
8241        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8242        transmute(simd_select_bitmask(k, broadcast, i8x32::ZERO))
8243    }
8244}
8245
8246/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8247///
8248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastb_epi8&expand=531)
8249#[inline]
8250#[target_feature(enable = "avx512bw,avx512vl")]
8251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8252#[cfg_attr(test, assert_instr(vpbroadcastb))]
8253pub fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
8254    unsafe {
8255        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8256        transmute(simd_select_bitmask(k, broadcast, src.as_i8x16()))
8257    }
8258}
8259
8260/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8261///
8262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastb_epi8&expand=532)
8263#[inline]
8264#[target_feature(enable = "avx512bw,avx512vl")]
8265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8266#[cfg_attr(test, assert_instr(vpbroadcastb))]
8267pub fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i {
8268    unsafe {
8269        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8270        transmute(simd_select_bitmask(k, broadcast, i8x16::ZERO))
8271    }
8272}
8273
8274/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
8275///
8276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi16&expand=6012)
8277#[inline]
8278#[target_feature(enable = "avx512bw")]
8279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8280#[cfg_attr(test, assert_instr(vpunpckhwd))]
8281pub fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
8282    unsafe {
8283        let a = a.as_i16x32();
8284        let b = b.as_i16x32();
8285        #[rustfmt::skip]
8286        let r: i16x32 = simd_shuffle!(
8287            a,
8288            b,
8289            [
8290                4, 32 + 4, 5, 32 + 5,
8291                6, 32 + 6, 7, 32 + 7,
8292                12, 32 + 12, 13, 32 + 13,
8293                14, 32 + 14, 15, 32 + 15,
8294                20, 32 + 20, 21, 32 + 21,
8295                22, 32 + 22, 23, 32 + 23,
8296                28, 32 + 28, 29, 32 + 29,
8297                30, 32 + 30, 31, 32 + 31,
8298            ],
8299        );
8300        transmute(r)
8301    }
8302}
8303
8304/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8305///
8306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi16&expand=6010)
8307#[inline]
8308#[target_feature(enable = "avx512bw")]
8309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8310#[cfg_attr(test, assert_instr(vpunpckhwd))]
8311pub fn _mm512_mask_unpackhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8312    unsafe {
8313        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8314        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32()))
8315    }
8316}
8317
8318/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8319///
8320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi16&expand=6011)
8321#[inline]
8322#[target_feature(enable = "avx512bw")]
8323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8324#[cfg_attr(test, assert_instr(vpunpckhwd))]
8325pub fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8326    unsafe {
8327        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8328        transmute(simd_select_bitmask(k, unpackhi, i16x32::ZERO))
8329    }
8330}
8331
8332/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8333///
8334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi16&expand=6007)
8335#[inline]
8336#[target_feature(enable = "avx512bw,avx512vl")]
8337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8338#[cfg_attr(test, assert_instr(vpunpckhwd))]
8339pub fn _mm256_mask_unpackhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8340    unsafe {
8341        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8342        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16()))
8343    }
8344}
8345
8346/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8347///
8348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi16&expand=6008)
8349#[inline]
8350#[target_feature(enable = "avx512bw,avx512vl")]
8351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8352#[cfg_attr(test, assert_instr(vpunpckhwd))]
8353pub fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8354    unsafe {
8355        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8356        transmute(simd_select_bitmask(k, unpackhi, i16x16::ZERO))
8357    }
8358}
8359
8360/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8361///
8362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi16&expand=6004)
8363#[inline]
8364#[target_feature(enable = "avx512bw,avx512vl")]
8365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8366#[cfg_attr(test, assert_instr(vpunpckhwd))]
8367pub fn _mm_mask_unpackhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8368    unsafe {
8369        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
8370        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8()))
8371    }
8372}
8373
8374/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8375///
8376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi16&expand=6005)
8377#[inline]
8378#[target_feature(enable = "avx512bw,avx512vl")]
8379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8380#[cfg_attr(test, assert_instr(vpunpckhwd))]
8381pub fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8382    unsafe {
8383        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
8384        transmute(simd_select_bitmask(k, unpackhi, i16x8::ZERO))
8385    }
8386}
8387
8388/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
8389///
8390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi8&expand=6039)
8391#[inline]
8392#[target_feature(enable = "avx512bw")]
8393#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8394#[cfg_attr(test, assert_instr(vpunpckhbw))]
8395pub fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
8396    unsafe {
8397        let a = a.as_i8x64();
8398        let b = b.as_i8x64();
8399        #[rustfmt::skip]
8400        let r: i8x64 = simd_shuffle!(
8401            a,
8402            b,
8403            [
8404                8, 64 + 8, 9, 64 + 9,
8405                10, 64 + 10, 11, 64 + 11,
8406                12, 64 + 12, 13, 64 + 13,
8407                14, 64 + 14, 15, 64 + 15,
8408                24, 64 + 24, 25, 64 + 25,
8409                26, 64 + 26, 27, 64 + 27,
8410                28, 64 + 28, 29, 64 + 29,
8411                30, 64 + 30, 31, 64 + 31,
8412                40, 64 + 40, 41, 64 + 41,
8413                42, 64 + 42, 43, 64 + 43,
8414                44, 64 + 44, 45, 64 + 45,
8415                46, 64 + 46, 47, 64 + 47,
8416                56, 64 + 56, 57, 64 + 57,
8417                58, 64 + 58, 59, 64 + 59,
8418                60, 64 + 60, 61, 64 + 61,
8419                62, 64 + 62, 63, 64 + 63,
8420            ],
8421        );
8422        transmute(r)
8423    }
8424}
8425
8426/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8427///
8428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi8&expand=6037)
8429#[inline]
8430#[target_feature(enable = "avx512bw")]
8431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8432#[cfg_attr(test, assert_instr(vpunpckhbw))]
8433pub fn _mm512_mask_unpackhi_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8434    unsafe {
8435        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
8436        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64()))
8437    }
8438}
8439
8440/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8441///
8442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi8&expand=6038)
8443#[inline]
8444#[target_feature(enable = "avx512bw")]
8445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8446#[cfg_attr(test, assert_instr(vpunpckhbw))]
8447pub fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8448    unsafe {
8449        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
8450        transmute(simd_select_bitmask(k, unpackhi, i8x64::ZERO))
8451    }
8452}
8453
8454/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8455///
8456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi8&expand=6034)
8457#[inline]
8458#[target_feature(enable = "avx512bw,avx512vl")]
8459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8460#[cfg_attr(test, assert_instr(vpunpckhbw))]
8461pub fn _mm256_mask_unpackhi_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8462    unsafe {
8463        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
8464        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32()))
8465    }
8466}
8467
8468/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8469///
8470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi8&expand=6035)
8471#[inline]
8472#[target_feature(enable = "avx512bw,avx512vl")]
8473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8474#[cfg_attr(test, assert_instr(vpunpckhbw))]
8475pub fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8476    unsafe {
8477        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
8478        transmute(simd_select_bitmask(k, unpackhi, i8x32::ZERO))
8479    }
8480}
8481
8482/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8483///
8484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi8&expand=6031)
8485#[inline]
8486#[target_feature(enable = "avx512bw,avx512vl")]
8487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8488#[cfg_attr(test, assert_instr(vpunpckhbw))]
8489pub fn _mm_mask_unpackhi_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8490    unsafe {
8491        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
8492        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16()))
8493    }
8494}
8495
8496/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8497///
8498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi8&expand=6032)
8499#[inline]
8500#[target_feature(enable = "avx512bw,avx512vl")]
8501#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8502#[cfg_attr(test, assert_instr(vpunpckhbw))]
8503pub fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8504    unsafe {
8505        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
8506        transmute(simd_select_bitmask(k, unpackhi, i8x16::ZERO))
8507    }
8508}
8509
8510/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
8511///
8512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi16&expand=6069)
8513#[inline]
8514#[target_feature(enable = "avx512bw")]
8515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8516#[cfg_attr(test, assert_instr(vpunpcklwd))]
8517pub fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
8518    unsafe {
8519        let a = a.as_i16x32();
8520        let b = b.as_i16x32();
8521        #[rustfmt::skip]
8522        let r: i16x32 = simd_shuffle!(
8523            a,
8524            b,
8525            [
8526               0,  32+0,   1, 32+1,
8527               2,  32+2,   3, 32+3,
8528               8,  32+8,   9, 32+9,
8529               10, 32+10, 11, 32+11,
8530               16, 32+16, 17, 32+17,
8531               18, 32+18, 19, 32+19,
8532               24, 32+24, 25, 32+25,
8533               26, 32+26, 27, 32+27
8534            ],
8535        );
8536        transmute(r)
8537    }
8538}
8539
8540/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8541///
8542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi16&expand=6067)
8543#[inline]
8544#[target_feature(enable = "avx512bw")]
8545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8546#[cfg_attr(test, assert_instr(vpunpcklwd))]
8547pub fn _mm512_mask_unpacklo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8548    unsafe {
8549        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
8550        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32()))
8551    }
8552}
8553
8554/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8555///
8556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi16&expand=6068)
8557#[inline]
8558#[target_feature(enable = "avx512bw")]
8559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8560#[cfg_attr(test, assert_instr(vpunpcklwd))]
8561pub fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8562    unsafe {
8563        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
8564        transmute(simd_select_bitmask(k, unpacklo, i16x32::ZERO))
8565    }
8566}
8567
8568/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8569///
8570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi16&expand=6064)
8571#[inline]
8572#[target_feature(enable = "avx512bw,avx512vl")]
8573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8574#[cfg_attr(test, assert_instr(vpunpcklwd))]
8575pub fn _mm256_mask_unpacklo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8576    unsafe {
8577        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
8578        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16()))
8579    }
8580}
8581
8582/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8583///
8584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi16&expand=6065)
8585#[inline]
8586#[target_feature(enable = "avx512bw,avx512vl")]
8587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8588#[cfg_attr(test, assert_instr(vpunpcklwd))]
8589pub fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8590    unsafe {
8591        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
8592        transmute(simd_select_bitmask(k, unpacklo, i16x16::ZERO))
8593    }
8594}
8595
8596/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8597///
8598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi16&expand=6061)
8599#[inline]
8600#[target_feature(enable = "avx512bw,avx512vl")]
8601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8602#[cfg_attr(test, assert_instr(vpunpcklwd))]
8603pub fn _mm_mask_unpacklo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8604    unsafe {
8605        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
8606        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8()))
8607    }
8608}
8609
8610/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8611///
8612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi16&expand=6062)
8613#[inline]
8614#[target_feature(enable = "avx512bw,avx512vl")]
8615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8616#[cfg_attr(test, assert_instr(vpunpcklwd))]
8617pub fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8618    unsafe {
8619        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
8620        transmute(simd_select_bitmask(k, unpacklo, i16x8::ZERO))
8621    }
8622}
8623
8624/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
8625///
8626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi8&expand=6096)
8627#[inline]
8628#[target_feature(enable = "avx512bw")]
8629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8630#[cfg_attr(test, assert_instr(vpunpcklbw))]
8631pub fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
8632    unsafe {
8633        let a = a.as_i8x64();
8634        let b = b.as_i8x64();
8635        #[rustfmt::skip]
8636        let r: i8x64 = simd_shuffle!(
8637            a,
8638            b,
8639            [
8640                0,  64+0,   1, 64+1,
8641                2,  64+2,   3, 64+3,
8642                4,  64+4,   5, 64+5,
8643                6,  64+6,   7, 64+7,
8644                16, 64+16, 17, 64+17,
8645                18, 64+18, 19, 64+19,
8646                20, 64+20, 21, 64+21,
8647                22, 64+22, 23, 64+23,
8648                32, 64+32, 33, 64+33,
8649                34, 64+34, 35, 64+35,
8650                36, 64+36, 37, 64+37,
8651                38, 64+38, 39, 64+39,
8652                48, 64+48, 49, 64+49,
8653                50, 64+50, 51, 64+51,
8654                52, 64+52, 53, 64+53,
8655                54, 64+54, 55, 64+55,
8656            ],
8657        );
8658        transmute(r)
8659    }
8660}
8661
8662/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8663///
8664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi8&expand=6094)
8665#[inline]
8666#[target_feature(enable = "avx512bw")]
8667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8668#[cfg_attr(test, assert_instr(vpunpcklbw))]
8669pub fn _mm512_mask_unpacklo_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8670    unsafe {
8671        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
8672        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64()))
8673    }
8674}
8675
8676/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8677///
8678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi8&expand=6095)
8679#[inline]
8680#[target_feature(enable = "avx512bw")]
8681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8682#[cfg_attr(test, assert_instr(vpunpcklbw))]
8683pub fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8684    unsafe {
8685        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
8686        transmute(simd_select_bitmask(k, unpacklo, i8x64::ZERO))
8687    }
8688}
8689
8690/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8691///
8692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi8&expand=6091)
8693#[inline]
8694#[target_feature(enable = "avx512bw,avx512vl")]
8695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8696#[cfg_attr(test, assert_instr(vpunpcklbw))]
8697pub fn _mm256_mask_unpacklo_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8698    unsafe {
8699        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
8700        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32()))
8701    }
8702}
8703
8704/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8705///
8706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi8&expand=6092)
8707#[inline]
8708#[target_feature(enable = "avx512bw,avx512vl")]
8709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8710#[cfg_attr(test, assert_instr(vpunpcklbw))]
8711pub fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8712    unsafe {
8713        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
8714        transmute(simd_select_bitmask(k, unpacklo, i8x32::ZERO))
8715    }
8716}
8717
8718/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8719///
8720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi8&expand=6088)
8721#[inline]
8722#[target_feature(enable = "avx512bw,avx512vl")]
8723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8724#[cfg_attr(test, assert_instr(vpunpcklbw))]
8725pub fn _mm_mask_unpacklo_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8726    unsafe {
8727        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
8728        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16()))
8729    }
8730}
8731
8732/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8733///
8734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi8&expand=6089)
8735#[inline]
8736#[target_feature(enable = "avx512bw,avx512vl")]
8737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8738#[cfg_attr(test, assert_instr(vpunpcklbw))]
8739pub fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8740    unsafe {
8741        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
8742        transmute(simd_select_bitmask(k, unpacklo, i8x16::ZERO))
8743    }
8744}
8745
8746/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8747///
8748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi16&expand=3795)
8749#[inline]
8750#[target_feature(enable = "avx512bw")]
8751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8752#[cfg_attr(test, assert_instr(vmovdqu16))]
8753pub fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
8754    unsafe {
8755        let mov = a.as_i16x32();
8756        transmute(simd_select_bitmask(k, mov, src.as_i16x32()))
8757    }
8758}
8759
8760/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8761///
8762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi16&expand=3796)
8763#[inline]
8764#[target_feature(enable = "avx512bw")]
8765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8766#[cfg_attr(test, assert_instr(vmovdqu16))]
8767pub fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
8768    unsafe {
8769        let mov = a.as_i16x32();
8770        transmute(simd_select_bitmask(k, mov, i16x32::ZERO))
8771    }
8772}
8773
8774/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8775///
8776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi16&expand=3793)
8777#[inline]
8778#[target_feature(enable = "avx512bw,avx512vl")]
8779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8780#[cfg_attr(test, assert_instr(vmovdqu16))]
8781pub fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
8782    unsafe {
8783        let mov = a.as_i16x16();
8784        transmute(simd_select_bitmask(k, mov, src.as_i16x16()))
8785    }
8786}
8787
8788/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8789///
8790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi16&expand=3794)
8791#[inline]
8792#[target_feature(enable = "avx512bw,avx512vl")]
8793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8794#[cfg_attr(test, assert_instr(vmovdqu16))]
8795pub fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i {
8796    unsafe {
8797        let mov = a.as_i16x16();
8798        transmute(simd_select_bitmask(k, mov, i16x16::ZERO))
8799    }
8800}
8801
8802/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8803///
8804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi16&expand=3791)
8805#[inline]
8806#[target_feature(enable = "avx512bw,avx512vl")]
8807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8808#[cfg_attr(test, assert_instr(vmovdqu16))]
8809pub fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8810    unsafe {
8811        let mov = a.as_i16x8();
8812        transmute(simd_select_bitmask(k, mov, src.as_i16x8()))
8813    }
8814}
8815
8816/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8817///
8818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi16&expand=3792)
8819#[inline]
8820#[target_feature(enable = "avx512bw,avx512vl")]
8821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8822#[cfg_attr(test, assert_instr(vmovdqu16))]
8823pub fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i {
8824    unsafe {
8825        let mov = a.as_i16x8();
8826        transmute(simd_select_bitmask(k, mov, i16x8::ZERO))
8827    }
8828}
8829
8830/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8831///
8832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi8&expand=3813)
8833#[inline]
8834#[target_feature(enable = "avx512bw")]
8835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8836#[cfg_attr(test, assert_instr(vmovdqu8))]
8837pub fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
8838    unsafe {
8839        let mov = a.as_i8x64();
8840        transmute(simd_select_bitmask(k, mov, src.as_i8x64()))
8841    }
8842}
8843
8844/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8845///
8846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi8&expand=3814)
8847#[inline]
8848#[target_feature(enable = "avx512bw")]
8849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8850#[cfg_attr(test, assert_instr(vmovdqu8))]
8851pub fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
8852    unsafe {
8853        let mov = a.as_i8x64();
8854        transmute(simd_select_bitmask(k, mov, i8x64::ZERO))
8855    }
8856}
8857
8858/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8859///
8860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi8&expand=3811)
8861#[inline]
8862#[target_feature(enable = "avx512bw,avx512vl")]
8863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8864#[cfg_attr(test, assert_instr(vmovdqu8))]
8865pub fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
8866    unsafe {
8867        let mov = a.as_i8x32();
8868        transmute(simd_select_bitmask(k, mov, src.as_i8x32()))
8869    }
8870}
8871
8872/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8873///
8874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi8&expand=3812)
8875#[inline]
8876#[target_feature(enable = "avx512bw,avx512vl")]
8877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8878#[cfg_attr(test, assert_instr(vmovdqu8))]
8879pub fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i {
8880    unsafe {
8881        let mov = a.as_i8x32();
8882        transmute(simd_select_bitmask(k, mov, i8x32::ZERO))
8883    }
8884}
8885
8886/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8887///
8888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi8&expand=3809)
8889#[inline]
8890#[target_feature(enable = "avx512bw,avx512vl")]
8891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8892#[cfg_attr(test, assert_instr(vmovdqu8))]
8893pub fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
8894    unsafe {
8895        let mov = a.as_i8x16();
8896        transmute(simd_select_bitmask(k, mov, src.as_i8x16()))
8897    }
8898}
8899
8900/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8901///
8902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi8&expand=3810)
8903#[inline]
8904#[target_feature(enable = "avx512bw,avx512vl")]
8905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8906#[cfg_attr(test, assert_instr(vmovdqu8))]
8907pub fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i {
8908    unsafe {
8909        let mov = a.as_i8x16();
8910        transmute(simd_select_bitmask(k, mov, i8x16::ZERO))
8911    }
8912}
8913
8914/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8915///
8916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi16&expand=4942)
8917#[inline]
8918#[target_feature(enable = "avx512bw")]
8919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8920#[cfg_attr(test, assert_instr(vpbroadcastw))]
8921pub fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
8922    unsafe {
8923        let r = _mm512_set1_epi16(a).as_i16x32();
8924        transmute(simd_select_bitmask(k, r, src.as_i16x32()))
8925    }
8926}
8927
8928/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8929///
8930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi16&expand=4943)
8931#[inline]
8932#[target_feature(enable = "avx512bw")]
8933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8934#[cfg_attr(test, assert_instr(vpbroadcastw))]
8935pub fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
8936    unsafe {
8937        let r = _mm512_set1_epi16(a).as_i16x32();
8938        transmute(simd_select_bitmask(k, r, i16x32::ZERO))
8939    }
8940}
8941
8942/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8943///
8944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi16&expand=4939)
8945#[inline]
8946#[target_feature(enable = "avx512bw,avx512vl")]
8947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8948#[cfg_attr(test, assert_instr(vpbroadcastw))]
8949pub fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i {
8950    unsafe {
8951        let r = _mm256_set1_epi16(a).as_i16x16();
8952        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
8953    }
8954}
8955
8956/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8957///
8958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi16&expand=4940)
8959#[inline]
8960#[target_feature(enable = "avx512bw,avx512vl")]
8961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8962#[cfg_attr(test, assert_instr(vpbroadcastw))]
8963pub fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i {
8964    unsafe {
8965        let r = _mm256_set1_epi16(a).as_i16x16();
8966        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
8967    }
8968}
8969
8970/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8971///
8972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi16&expand=4936)
8973#[inline]
8974#[target_feature(enable = "avx512bw,avx512vl")]
8975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8976#[cfg_attr(test, assert_instr(vpbroadcastw))]
8977pub fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i {
8978    unsafe {
8979        let r = _mm_set1_epi16(a).as_i16x8();
8980        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
8981    }
8982}
8983
8984/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8985///
8986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi16&expand=4937)
8987#[inline]
8988#[target_feature(enable = "avx512bw,avx512vl")]
8989#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8990#[cfg_attr(test, assert_instr(vpbroadcastw))]
8991pub fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i {
8992    unsafe {
8993        let r = _mm_set1_epi16(a).as_i16x8();
8994        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
8995    }
8996}
8997
8998/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8999///
9000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi8&expand=4970)
9001#[inline]
9002#[target_feature(enable = "avx512bw")]
9003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9004#[cfg_attr(test, assert_instr(vpbroadcast))]
9005pub fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
9006    unsafe {
9007        let r = _mm512_set1_epi8(a).as_i8x64();
9008        transmute(simd_select_bitmask(k, r, src.as_i8x64()))
9009    }
9010}
9011
9012/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9013///
9014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi8&expand=4971)
9015#[inline]
9016#[target_feature(enable = "avx512bw")]
9017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9018#[cfg_attr(test, assert_instr(vpbroadcast))]
9019pub fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
9020    unsafe {
9021        let r = _mm512_set1_epi8(a).as_i8x64();
9022        transmute(simd_select_bitmask(k, r, i8x64::ZERO))
9023    }
9024}
9025
9026/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9027///
9028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi8&expand=4967)
9029#[inline]
9030#[target_feature(enable = "avx512bw,avx512vl")]
9031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9032#[cfg_attr(test, assert_instr(vpbroadcast))]
9033pub fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i {
9034    unsafe {
9035        let r = _mm256_set1_epi8(a).as_i8x32();
9036        transmute(simd_select_bitmask(k, r, src.as_i8x32()))
9037    }
9038}
9039
9040/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9041///
9042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi8&expand=4968)
9043#[inline]
9044#[target_feature(enable = "avx512bw,avx512vl")]
9045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9046#[cfg_attr(test, assert_instr(vpbroadcast))]
9047pub fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i {
9048    unsafe {
9049        let r = _mm256_set1_epi8(a).as_i8x32();
9050        transmute(simd_select_bitmask(k, r, i8x32::ZERO))
9051    }
9052}
9053
9054/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9055///
9056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi8&expand=4964)
9057#[inline]
9058#[target_feature(enable = "avx512bw,avx512vl")]
9059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9060#[cfg_attr(test, assert_instr(vpbroadcast))]
9061pub fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i {
9062    unsafe {
9063        let r = _mm_set1_epi8(a).as_i8x16();
9064        transmute(simd_select_bitmask(k, r, src.as_i8x16()))
9065    }
9066}
9067
9068/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9069///
9070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi8&expand=4965)
9071#[inline]
9072#[target_feature(enable = "avx512bw,avx512vl")]
9073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9074#[cfg_attr(test, assert_instr(vpbroadcast))]
9075pub fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
9076    unsafe {
9077        let r = _mm_set1_epi8(a).as_i8x16();
9078        transmute(simd_select_bitmask(k, r, i8x16::ZERO))
9079    }
9080}
9081
9082/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst.
9083///
9084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflelo_epi16&expand=5221)
9085#[inline]
9086#[target_feature(enable = "avx512bw")]
9087#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9088#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9089#[rustc_legacy_const_generics(1)]
9090pub fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9091    unsafe {
9092        static_assert_uimm_bits!(IMM8, 8);
9093        let a = a.as_i16x32();
9094        let r: i16x32 = simd_shuffle!(
9095            a,
9096            a,
9097            [
9098                IMM8 as u32 & 0b11,
9099                (IMM8 as u32 >> 2) & 0b11,
9100                (IMM8 as u32 >> 4) & 0b11,
9101                (IMM8 as u32 >> 6) & 0b11,
9102                4,
9103                5,
9104                6,
9105                7,
9106                (IMM8 as u32 & 0b11) + 8,
9107                ((IMM8 as u32 >> 2) & 0b11) + 8,
9108                ((IMM8 as u32 >> 4) & 0b11) + 8,
9109                ((IMM8 as u32 >> 6) & 0b11) + 8,
9110                12,
9111                13,
9112                14,
9113                15,
9114                (IMM8 as u32 & 0b11) + 16,
9115                ((IMM8 as u32 >> 2) & 0b11) + 16,
9116                ((IMM8 as u32 >> 4) & 0b11) + 16,
9117                ((IMM8 as u32 >> 6) & 0b11) + 16,
9118                20,
9119                21,
9120                22,
9121                23,
9122                (IMM8 as u32 & 0b11) + 24,
9123                ((IMM8 as u32 >> 2) & 0b11) + 24,
9124                ((IMM8 as u32 >> 4) & 0b11) + 24,
9125                ((IMM8 as u32 >> 6) & 0b11) + 24,
9126                28,
9127                29,
9128                30,
9129                31,
9130            ],
9131        );
9132        transmute(r)
9133    }
9134}
9135
9136/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9137///
9138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflelo_epi16&expand=5219)
9139#[inline]
9140#[target_feature(enable = "avx512bw")]
9141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9142#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9143#[rustc_legacy_const_generics(3)]
9144pub fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
9145    src: __m512i,
9146    k: __mmask32,
9147    a: __m512i,
9148) -> __m512i {
9149    unsafe {
9150        static_assert_uimm_bits!(IMM8, 8);
9151        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9152        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
9153    }
9154}
9155
9156/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9157///
9158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflelo_epi16&expand=5220)
9159#[inline]
9160#[target_feature(enable = "avx512bw")]
9161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9162#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9163#[rustc_legacy_const_generics(2)]
9164pub fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
9165    unsafe {
9166        static_assert_uimm_bits!(IMM8, 8);
9167        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9168        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
9169    }
9170}
9171
9172/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9173///
9174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflelo_epi16&expand=5216)
9175#[inline]
9176#[target_feature(enable = "avx512bw,avx512vl")]
9177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9178#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9179#[rustc_legacy_const_generics(3)]
9180pub fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
9181    src: __m256i,
9182    k: __mmask16,
9183    a: __m256i,
9184) -> __m256i {
9185    unsafe {
9186        static_assert_uimm_bits!(IMM8, 8);
9187        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9188        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
9189    }
9190}
9191
9192/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9193///
9194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflelo_epi16&expand=5217)
9195#[inline]
9196#[target_feature(enable = "avx512bw,avx512vl")]
9197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9198#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9199#[rustc_legacy_const_generics(2)]
9200pub fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
9201    unsafe {
9202        static_assert_uimm_bits!(IMM8, 8);
9203        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9204        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
9205    }
9206}
9207
9208/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9209///
9210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflelo_epi16&expand=5213)
9211#[inline]
9212#[target_feature(enable = "avx512bw,avx512vl")]
9213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9214#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9215#[rustc_legacy_const_generics(3)]
9216pub fn _mm_mask_shufflelo_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9217    unsafe {
9218        static_assert_uimm_bits!(IMM8, 8);
9219        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9220        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
9221    }
9222}
9223
9224/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9225///
9226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflelo_epi16&expand=5214)
9227#[inline]
9228#[target_feature(enable = "avx512bw,avx512vl")]
9229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9230#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9231#[rustc_legacy_const_generics(2)]
9232pub fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
9233    unsafe {
9234        static_assert_uimm_bits!(IMM8, 8);
9235        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9236        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
9237    }
9238}
9239
9240/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst.
9241///
9242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflehi_epi16&expand=5212)
9243#[inline]
9244#[target_feature(enable = "avx512bw")]
9245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9246#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9247#[rustc_legacy_const_generics(1)]
9248pub fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9249    unsafe {
9250        static_assert_uimm_bits!(IMM8, 8);
9251        let a = a.as_i16x32();
9252        let r: i16x32 = simd_shuffle!(
9253            a,
9254            a,
9255            [
9256                0,
9257                1,
9258                2,
9259                3,
9260                (IMM8 as u32 & 0b11) + 4,
9261                ((IMM8 as u32 >> 2) & 0b11) + 4,
9262                ((IMM8 as u32 >> 4) & 0b11) + 4,
9263                ((IMM8 as u32 >> 6) & 0b11) + 4,
9264                8,
9265                9,
9266                10,
9267                11,
9268                (IMM8 as u32 & 0b11) + 12,
9269                ((IMM8 as u32 >> 2) & 0b11) + 12,
9270                ((IMM8 as u32 >> 4) & 0b11) + 12,
9271                ((IMM8 as u32 >> 6) & 0b11) + 12,
9272                16,
9273                17,
9274                18,
9275                19,
9276                (IMM8 as u32 & 0b11) + 20,
9277                ((IMM8 as u32 >> 2) & 0b11) + 20,
9278                ((IMM8 as u32 >> 4) & 0b11) + 20,
9279                ((IMM8 as u32 >> 6) & 0b11) + 20,
9280                24,
9281                25,
9282                26,
9283                27,
9284                (IMM8 as u32 & 0b11) + 28,
9285                ((IMM8 as u32 >> 2) & 0b11) + 28,
9286                ((IMM8 as u32 >> 4) & 0b11) + 28,
9287                ((IMM8 as u32 >> 6) & 0b11) + 28,
9288            ],
9289        );
9290        transmute(r)
9291    }
9292}
9293
9294/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9295///
9296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflehi_epi16&expand=5210)
9297#[inline]
9298#[target_feature(enable = "avx512bw")]
9299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9300#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9301#[rustc_legacy_const_generics(3)]
9302pub fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
9303    src: __m512i,
9304    k: __mmask32,
9305    a: __m512i,
9306) -> __m512i {
9307    unsafe {
9308        static_assert_uimm_bits!(IMM8, 8);
9309        let r = _mm512_shufflehi_epi16::<IMM8>(a);
9310        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
9311    }
9312}
9313
9314/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9315///
9316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflehi_epi16&expand=5211)
9317#[inline]
9318#[target_feature(enable = "avx512bw")]
9319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9320#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9321#[rustc_legacy_const_generics(2)]
9322pub fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
9323    unsafe {
9324        static_assert_uimm_bits!(IMM8, 8);
9325        let r = _mm512_shufflehi_epi16::<IMM8>(a);
9326        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
9327    }
9328}
9329
9330/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9331///
9332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflehi_epi16&expand=5207)
9333#[inline]
9334#[target_feature(enable = "avx512bw,avx512vl")]
9335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9336#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9337#[rustc_legacy_const_generics(3)]
9338pub fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
9339    src: __m256i,
9340    k: __mmask16,
9341    a: __m256i,
9342) -> __m256i {
9343    unsafe {
9344        static_assert_uimm_bits!(IMM8, 8);
9345        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
9346        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
9347    }
9348}
9349
9350/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9351///
9352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflehi_epi16&expand=5208)
9353#[inline]
9354#[target_feature(enable = "avx512bw,avx512vl")]
9355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9356#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9357#[rustc_legacy_const_generics(2)]
9358pub fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
9359    unsafe {
9360        static_assert_uimm_bits!(IMM8, 8);
9361        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
9362        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
9363    }
9364}
9365
9366/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9367///
9368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflehi_epi16&expand=5204)
9369#[inline]
9370#[target_feature(enable = "avx512bw,avx512vl")]
9371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9372#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9373#[rustc_legacy_const_generics(3)]
9374pub fn _mm_mask_shufflehi_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9375    unsafe {
9376        static_assert_uimm_bits!(IMM8, 8);
9377        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
9378        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
9379    }
9380}
9381
9382/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9383///
9384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflehi_epi16&expand=5205)
9385#[inline]
9386#[target_feature(enable = "avx512bw,avx512vl")]
9387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9388#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9389#[rustc_legacy_const_generics(2)]
9390pub fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
9391    unsafe {
9392        static_assert_uimm_bits!(IMM8, 8);
9393        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
9394        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
9395    }
9396}
9397
9398/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst.
9399///
9400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi8&expand=5159)
9401#[inline]
9402#[target_feature(enable = "avx512bw")]
9403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9404#[cfg_attr(test, assert_instr(vpshufb))]
9405pub fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i {
9406    unsafe { transmute(vpshufb(a.as_i8x64(), b.as_i8x64())) }
9407}
9408
9409/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9410///
9411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi8&expand=5157)
9412#[inline]
9413#[target_feature(enable = "avx512bw")]
9414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9415#[cfg_attr(test, assert_instr(vpshufb))]
9416pub fn _mm512_mask_shuffle_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9417    unsafe {
9418        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
9419        transmute(simd_select_bitmask(k, shuffle, src.as_i8x64()))
9420    }
9421}
9422
9423/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9424///
9425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi8&expand=5158)
9426#[inline]
9427#[target_feature(enable = "avx512bw")]
9428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9429#[cfg_attr(test, assert_instr(vpshufb))]
9430pub fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9431    unsafe {
9432        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
9433        transmute(simd_select_bitmask(k, shuffle, i8x64::ZERO))
9434    }
9435}
9436
9437/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9438///
9439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi8&expand=5154)
9440#[inline]
9441#[target_feature(enable = "avx512bw,avx512vl")]
9442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9443#[cfg_attr(test, assert_instr(vpshufb))]
9444pub fn _mm256_mask_shuffle_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9445    unsafe {
9446        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
9447        transmute(simd_select_bitmask(k, shuffle, src.as_i8x32()))
9448    }
9449}
9450
9451/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9452///
9453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi8&expand=5155)
9454#[inline]
9455#[target_feature(enable = "avx512bw,avx512vl")]
9456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9457#[cfg_attr(test, assert_instr(vpshufb))]
9458pub fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9459    unsafe {
9460        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
9461        transmute(simd_select_bitmask(k, shuffle, i8x32::ZERO))
9462    }
9463}
9464
9465/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9466///
9467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi8&expand=5151)
9468#[inline]
9469#[target_feature(enable = "avx512bw,avx512vl")]
9470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9471#[cfg_attr(test, assert_instr(vpshufb))]
9472pub fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9473    unsafe {
9474        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
9475        transmute(simd_select_bitmask(k, shuffle, src.as_i8x16()))
9476    }
9477}
9478
9479/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9480///
9481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi8&expand=5152)
9482#[inline]
9483#[target_feature(enable = "avx512bw,avx512vl")]
9484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9485#[cfg_attr(test, assert_instr(vpshufb))]
9486pub fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9487    unsafe {
9488        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
9489        transmute(simd_select_bitmask(k, shuffle, i8x16::ZERO))
9490    }
9491}
9492
9493/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9494///
9495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi16_mask&expand=5884)
9496#[inline]
9497#[target_feature(enable = "avx512bw")]
9498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9499#[cfg_attr(test, assert_instr(vptestmw))]
9500pub fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
9501    let and = _mm512_and_si512(a, b);
9502    let zero = _mm512_setzero_si512();
9503    _mm512_cmpneq_epi16_mask(and, zero)
9504}
9505
9506/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9507///
9508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi16_mask&expand=5883)
9509#[inline]
9510#[target_feature(enable = "avx512bw")]
9511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9512#[cfg_attr(test, assert_instr(vptestmw))]
9513pub fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
9514    let and = _mm512_and_si512(a, b);
9515    let zero = _mm512_setzero_si512();
9516    _mm512_mask_cmpneq_epi16_mask(k, and, zero)
9517}
9518
9519/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9520///
9521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi16_mask&expand=5882)
9522#[inline]
9523#[target_feature(enable = "avx512bw,avx512vl")]
9524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9525#[cfg_attr(test, assert_instr(vptestmw))]
9526pub fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
9527    let and = _mm256_and_si256(a, b);
9528    let zero = _mm256_setzero_si256();
9529    _mm256_cmpneq_epi16_mask(and, zero)
9530}
9531
9532/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9533///
9534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi16_mask&expand=5881)
9535#[inline]
9536#[target_feature(enable = "avx512bw,avx512vl")]
9537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9538#[cfg_attr(test, assert_instr(vptestmw))]
9539pub fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
9540    let and = _mm256_and_si256(a, b);
9541    let zero = _mm256_setzero_si256();
9542    _mm256_mask_cmpneq_epi16_mask(k, and, zero)
9543}
9544
9545/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9546///
9547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi16_mask&expand=5880)
9548#[inline]
9549#[target_feature(enable = "avx512bw,avx512vl")]
9550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9551#[cfg_attr(test, assert_instr(vptestmw))]
9552pub fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
9553    let and = _mm_and_si128(a, b);
9554    let zero = _mm_setzero_si128();
9555    _mm_cmpneq_epi16_mask(and, zero)
9556}
9557
9558/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9559///
9560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi16_mask&expand=5879)
9561#[inline]
9562#[target_feature(enable = "avx512bw,avx512vl")]
9563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9564#[cfg_attr(test, assert_instr(vptestmw))]
9565pub fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
9566    let and = _mm_and_si128(a, b);
9567    let zero = _mm_setzero_si128();
9568    _mm_mask_cmpneq_epi16_mask(k, and, zero)
9569}
9570
9571/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9572///
9573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi8_mask&expand=5902)
9574#[inline]
9575#[target_feature(enable = "avx512bw")]
9576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9577#[cfg_attr(test, assert_instr(vptestmb))]
9578pub fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
9579    let and = _mm512_and_si512(a, b);
9580    let zero = _mm512_setzero_si512();
9581    _mm512_cmpneq_epi8_mask(and, zero)
9582}
9583
9584/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9585///
9586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi8_mask&expand=5901)
9587#[inline]
9588#[target_feature(enable = "avx512bw")]
9589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9590#[cfg_attr(test, assert_instr(vptestmb))]
9591pub fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
9592    let and = _mm512_and_si512(a, b);
9593    let zero = _mm512_setzero_si512();
9594    _mm512_mask_cmpneq_epi8_mask(k, and, zero)
9595}
9596
9597/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9598///
9599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi8_mask&expand=5900)
9600#[inline]
9601#[target_feature(enable = "avx512bw,avx512vl")]
9602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9603#[cfg_attr(test, assert_instr(vptestmb))]
9604pub fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
9605    let and = _mm256_and_si256(a, b);
9606    let zero = _mm256_setzero_si256();
9607    _mm256_cmpneq_epi8_mask(and, zero)
9608}
9609
9610/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9611///
9612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi8_mask&expand=5899)
9613#[inline]
9614#[target_feature(enable = "avx512bw,avx512vl")]
9615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9616#[cfg_attr(test, assert_instr(vptestmb))]
9617pub fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
9618    let and = _mm256_and_si256(a, b);
9619    let zero = _mm256_setzero_si256();
9620    _mm256_mask_cmpneq_epi8_mask(k, and, zero)
9621}
9622
9623/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9624///
9625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi8_mask&expand=5898)
9626#[inline]
9627#[target_feature(enable = "avx512bw,avx512vl")]
9628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9629#[cfg_attr(test, assert_instr(vptestmb))]
9630pub fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
9631    let and = _mm_and_si128(a, b);
9632    let zero = _mm_setzero_si128();
9633    _mm_cmpneq_epi8_mask(and, zero)
9634}
9635
9636/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9637///
9638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi8_mask&expand=5897)
9639#[inline]
9640#[target_feature(enable = "avx512bw,avx512vl")]
9641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9642#[cfg_attr(test, assert_instr(vptestmb))]
9643pub fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
9644    let and = _mm_and_si128(a, b);
9645    let zero = _mm_setzero_si128();
9646    _mm_mask_cmpneq_epi8_mask(k, and, zero)
9647}
9648
9649/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9650///
9651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi16_mask&expand=5915)
9652#[inline]
9653#[target_feature(enable = "avx512bw")]
9654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9655#[cfg_attr(test, assert_instr(vptestnmw))]
9656pub fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
9657    let and = _mm512_and_si512(a, b);
9658    let zero = _mm512_setzero_si512();
9659    _mm512_cmpeq_epi16_mask(and, zero)
9660}
9661
9662/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9663///
9664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi16_mask&expand=5914)
9665#[inline]
9666#[target_feature(enable = "avx512bw")]
9667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9668#[cfg_attr(test, assert_instr(vptestnmw))]
9669pub fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
9670    let and = _mm512_and_si512(a, b);
9671    let zero = _mm512_setzero_si512();
9672    _mm512_mask_cmpeq_epi16_mask(k, and, zero)
9673}
9674
9675/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9676///
9677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi16_mask&expand=5913)
9678#[inline]
9679#[target_feature(enable = "avx512bw,avx512vl")]
9680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9681#[cfg_attr(test, assert_instr(vptestnmw))]
9682pub fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
9683    let and = _mm256_and_si256(a, b);
9684    let zero = _mm256_setzero_si256();
9685    _mm256_cmpeq_epi16_mask(and, zero)
9686}
9687
9688/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9689///
9690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi16_mask&expand=5912)
9691#[inline]
9692#[target_feature(enable = "avx512bw,avx512vl")]
9693#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9694#[cfg_attr(test, assert_instr(vptestnmw))]
9695pub fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
9696    let and = _mm256_and_si256(a, b);
9697    let zero = _mm256_setzero_si256();
9698    _mm256_mask_cmpeq_epi16_mask(k, and, zero)
9699}
9700
9701/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9702///
9703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi16_mask&expand=5911)
9704#[inline]
9705#[target_feature(enable = "avx512bw,avx512vl")]
9706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9707#[cfg_attr(test, assert_instr(vptestnmw))]
9708pub fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
9709    let and = _mm_and_si128(a, b);
9710    let zero = _mm_setzero_si128();
9711    _mm_cmpeq_epi16_mask(and, zero)
9712}
9713
9714/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9715///
9716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi16_mask&expand=5910)
9717#[inline]
9718#[target_feature(enable = "avx512bw,avx512vl")]
9719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9720#[cfg_attr(test, assert_instr(vptestnmw))]
9721pub fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
9722    let and = _mm_and_si128(a, b);
9723    let zero = _mm_setzero_si128();
9724    _mm_mask_cmpeq_epi16_mask(k, and, zero)
9725}
9726
9727/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9728///
9729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi8_mask&expand=5933)
9730#[inline]
9731#[target_feature(enable = "avx512bw")]
9732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9733#[cfg_attr(test, assert_instr(vptestnmb))]
9734pub fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
9735    let and = _mm512_and_si512(a, b);
9736    let zero = _mm512_setzero_si512();
9737    _mm512_cmpeq_epi8_mask(and, zero)
9738}
9739
9740/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9741///
9742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi8_mask&expand=5932)
9743#[inline]
9744#[target_feature(enable = "avx512bw")]
9745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9746#[cfg_attr(test, assert_instr(vptestnmb))]
9747pub fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
9748    let and = _mm512_and_si512(a, b);
9749    let zero = _mm512_setzero_si512();
9750    _mm512_mask_cmpeq_epi8_mask(k, and, zero)
9751}
9752
9753/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9754///
9755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi8_mask&expand=5931)
9756#[inline]
9757#[target_feature(enable = "avx512bw,avx512vl")]
9758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9759#[cfg_attr(test, assert_instr(vptestnmb))]
9760pub fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
9761    let and = _mm256_and_si256(a, b);
9762    let zero = _mm256_setzero_si256();
9763    _mm256_cmpeq_epi8_mask(and, zero)
9764}
9765
9766/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9767///
9768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi8_mask&expand=5930)
9769#[inline]
9770#[target_feature(enable = "avx512bw,avx512vl")]
9771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9772#[cfg_attr(test, assert_instr(vptestnmb))]
9773pub fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
9774    let and = _mm256_and_si256(a, b);
9775    let zero = _mm256_setzero_si256();
9776    _mm256_mask_cmpeq_epi8_mask(k, and, zero)
9777}
9778
9779/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9780///
9781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi8_mask&expand=5929)
9782#[inline]
9783#[target_feature(enable = "avx512bw,avx512vl")]
9784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9785#[cfg_attr(test, assert_instr(vptestnmb))]
9786pub fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
9787    let and = _mm_and_si128(a, b);
9788    let zero = _mm_setzero_si128();
9789    _mm_cmpeq_epi8_mask(and, zero)
9790}
9791
9792/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9793///
9794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi8_mask&expand=5928)
9795#[inline]
9796#[target_feature(enable = "avx512bw,avx512vl")]
9797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9798#[cfg_attr(test, assert_instr(vptestnmb))]
9799pub fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
9800    let and = _mm_and_si128(a, b);
9801    let zero = _mm_setzero_si128();
9802    _mm_mask_cmpeq_epi8_mask(k, and, zero)
9803}
9804
9805/// Store 64-bit mask from a into memory.
9806///
9807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask64&expand=5578)
9808#[inline]
9809#[target_feature(enable = "avx512bw")]
9810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9811#[cfg_attr(test, assert_instr(mov))] //should be kmovq
9812pub unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
9813    ptr::write(mem_addr as *mut __mmask64, a);
9814}
9815
9816/// Store 32-bit mask from a into memory.
9817///
9818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask32&expand=5577)
9819#[inline]
9820#[target_feature(enable = "avx512bw")]
9821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9822#[cfg_attr(test, assert_instr(mov))] //should be kmovd
9823pub unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
9824    ptr::write(mem_addr as *mut __mmask32, a);
9825}
9826
9827/// Load 64-bit mask from memory into k.
9828///
9829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask64&expand=3318)
9830#[inline]
9831#[target_feature(enable = "avx512bw")]
9832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9833#[cfg_attr(test, assert_instr(mov))] //should be kmovq
9834pub unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
9835    ptr::read(mem_addr as *const __mmask64)
9836}
9837
9838/// Load 32-bit mask from memory into k.
9839///
9840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask32&expand=3317)
9841#[inline]
9842#[target_feature(enable = "avx512bw")]
9843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9844#[cfg_attr(test, assert_instr(mov))] //should be kmovd
9845pub unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 {
9846    ptr::read(mem_addr as *const __mmask32)
9847}
9848
9849/// Compute the absolute differences of packed unsigned 8-bit integers in a and b, then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in dst.
9850///
9851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sad_epu8&expand=4855)
9852#[inline]
9853#[target_feature(enable = "avx512bw")]
9854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9855#[cfg_attr(test, assert_instr(vpsadbw))]
9856pub fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i {
9857    unsafe { transmute(vpsadbw(a.as_u8x64(), b.as_u8x64())) }
9858}
9859
9860/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9861///
9862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dbsad_epu8&expand=2114)
9863#[inline]
9864#[target_feature(enable = "avx512bw")]
9865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9866#[rustc_legacy_const_generics(2)]
9867#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9868pub fn _mm512_dbsad_epu8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
9869    unsafe {
9870        static_assert_uimm_bits!(IMM8, 8);
9871        let a = a.as_u8x64();
9872        let b = b.as_u8x64();
9873        let r = vdbpsadbw(a, b, IMM8);
9874        transmute(r)
9875    }
9876}
9877
9878/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9879///
9880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dbsad_epu8&expand=2115)
9881#[inline]
9882#[target_feature(enable = "avx512bw")]
9883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9884#[rustc_legacy_const_generics(4)]
9885#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9886pub fn _mm512_mask_dbsad_epu8<const IMM8: i32>(
9887    src: __m512i,
9888    k: __mmask32,
9889    a: __m512i,
9890    b: __m512i,
9891) -> __m512i {
9892    unsafe {
9893        static_assert_uimm_bits!(IMM8, 8);
9894        let a = a.as_u8x64();
9895        let b = b.as_u8x64();
9896        let r = vdbpsadbw(a, b, IMM8);
9897        transmute(simd_select_bitmask(k, r, src.as_u16x32()))
9898    }
9899}
9900
9901/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9902///
9903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dbsad_epu8&expand=2116)
9904#[inline]
9905#[target_feature(enable = "avx512bw")]
9906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9907#[rustc_legacy_const_generics(3)]
9908#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9909pub fn _mm512_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
9910    unsafe {
9911        static_assert_uimm_bits!(IMM8, 8);
9912        let a = a.as_u8x64();
9913        let b = b.as_u8x64();
9914        let r = vdbpsadbw(a, b, IMM8);
9915        transmute(simd_select_bitmask(k, r, u16x32::ZERO))
9916    }
9917}
9918
9919/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9920///
9921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dbsad_epu8&expand=2111)
9922#[inline]
9923#[target_feature(enable = "avx512bw,avx512vl")]
9924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9925#[rustc_legacy_const_generics(2)]
9926#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9927pub fn _mm256_dbsad_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
9928    unsafe {
9929        static_assert_uimm_bits!(IMM8, 8);
9930        let a = a.as_u8x32();
9931        let b = b.as_u8x32();
9932        let r = vdbpsadbw256(a, b, IMM8);
9933        transmute(r)
9934    }
9935}
9936
9937/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9938///
9939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dbsad_epu8&expand=2112)
9940#[inline]
9941#[target_feature(enable = "avx512bw,avx512vl")]
9942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9943#[rustc_legacy_const_generics(4)]
9944#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9945pub fn _mm256_mask_dbsad_epu8<const IMM8: i32>(
9946    src: __m256i,
9947    k: __mmask16,
9948    a: __m256i,
9949    b: __m256i,
9950) -> __m256i {
9951    unsafe {
9952        static_assert_uimm_bits!(IMM8, 8);
9953        let a = a.as_u8x32();
9954        let b = b.as_u8x32();
9955        let r = vdbpsadbw256(a, b, IMM8);
9956        transmute(simd_select_bitmask(k, r, src.as_u16x16()))
9957    }
9958}
9959
9960/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9961///
9962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dbsad_epu8&expand=2113)
9963#[inline]
9964#[target_feature(enable = "avx512bw,avx512vl")]
9965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9966#[rustc_legacy_const_generics(3)]
9967#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9968pub fn _mm256_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
9969    unsafe {
9970        static_assert_uimm_bits!(IMM8, 8);
9971        let a = a.as_u8x32();
9972        let b = b.as_u8x32();
9973        let r = vdbpsadbw256(a, b, IMM8);
9974        transmute(simd_select_bitmask(k, r, u16x16::ZERO))
9975    }
9976}
9977
9978/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9979///
9980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dbsad_epu8&expand=2108)
9981#[inline]
9982#[target_feature(enable = "avx512bw,avx512vl")]
9983#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9984#[rustc_legacy_const_generics(2)]
9985#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9986pub fn _mm_dbsad_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
9987    unsafe {
9988        static_assert_uimm_bits!(IMM8, 8);
9989        let a = a.as_u8x16();
9990        let b = b.as_u8x16();
9991        let r = vdbpsadbw128(a, b, IMM8);
9992        transmute(r)
9993    }
9994}
9995
9996/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9997///
9998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dbsad_epu8&expand=2109)
9999#[inline]
10000#[target_feature(enable = "avx512bw,avx512vl")]
10001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10002#[rustc_legacy_const_generics(4)]
10003#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10004pub fn _mm_mask_dbsad_epu8<const IMM8: i32>(
10005    src: __m128i,
10006    k: __mmask8,
10007    a: __m128i,
10008    b: __m128i,
10009) -> __m128i {
10010    unsafe {
10011        static_assert_uimm_bits!(IMM8, 8);
10012        let a = a.as_u8x16();
10013        let b = b.as_u8x16();
10014        let r = vdbpsadbw128(a, b, IMM8);
10015        transmute(simd_select_bitmask(k, r, src.as_u16x8()))
10016    }
10017}
10018
10019/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10020///
10021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dbsad_epu8&expand=2110)
10022#[inline]
10023#[target_feature(enable = "avx512bw,avx512vl")]
10024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10025#[rustc_legacy_const_generics(3)]
10026#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10027pub fn _mm_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
10028    unsafe {
10029        static_assert_uimm_bits!(IMM8, 8);
10030        let a = a.as_u8x16();
10031        let b = b.as_u8x16();
10032        let r = vdbpsadbw128(a, b, IMM8);
10033        transmute(simd_select_bitmask(k, r, u16x8::ZERO))
10034    }
10035}
10036
10037/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10038///
10039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi16_mask&expand=3873)
10040#[inline]
10041#[target_feature(enable = "avx512bw")]
10042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10043#[cfg_attr(test, assert_instr(vpmovw2m))]
10044pub fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
10045    let filter = _mm512_set1_epi16(1 << 15);
10046    let a = _mm512_and_si512(a, filter);
10047    _mm512_cmpeq_epi16_mask(a, filter)
10048}
10049
10050/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10051///
10052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi16_mask&expand=3872)
10053#[inline]
10054#[target_feature(enable = "avx512bw,avx512vl")]
10055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10056#[cfg_attr(test, assert_instr(vpmovw2m))]
10057pub fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
10058    let filter = _mm256_set1_epi16(1 << 15);
10059    let a = _mm256_and_si256(a, filter);
10060    _mm256_cmpeq_epi16_mask(a, filter)
10061}
10062
10063/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10064///
10065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi16_mask&expand=3871)
10066#[inline]
10067#[target_feature(enable = "avx512bw,avx512vl")]
10068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10069#[cfg_attr(test, assert_instr(vpmovw2m))]
10070pub fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
10071    let filter = _mm_set1_epi16(1 << 15);
10072    let a = _mm_and_si128(a, filter);
10073    _mm_cmpeq_epi16_mask(a, filter)
10074}
10075
10076/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10077///
10078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi8_mask&expand=3883)
10079#[inline]
10080#[target_feature(enable = "avx512bw")]
10081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10082#[cfg_attr(test, assert_instr(vpmovb2m))]
10083pub fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
10084    let filter = _mm512_set1_epi8(1 << 7);
10085    let a = _mm512_and_si512(a, filter);
10086    _mm512_cmpeq_epi8_mask(a, filter)
10087}
10088
10089/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10090///
10091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi8_mask&expand=3882)
10092#[inline]
10093#[target_feature(enable = "avx512bw,avx512vl")]
10094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10095#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10096// using vpmovb2m plus converting the mask register to a standard register.
10097pub fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
10098    let filter = _mm256_set1_epi8(1 << 7);
10099    let a = _mm256_and_si256(a, filter);
10100    _mm256_cmpeq_epi8_mask(a, filter)
10101}
10102
10103/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10104///
10105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi8_mask&expand=3881)
10106#[inline]
10107#[target_feature(enable = "avx512bw,avx512vl")]
10108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10109#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10110// using vpmovb2m plus converting the mask register to a standard register.
10111pub fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
10112    let filter = _mm_set1_epi8(1 << 7);
10113    let a = _mm_and_si128(a, filter);
10114    _mm_cmpeq_epi8_mask(a, filter)
10115}
10116
10117/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10118///
10119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi16&expand=3886)
10120#[inline]
10121#[target_feature(enable = "avx512bw")]
10122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10123#[cfg_attr(test, assert_instr(vpmovm2w))]
10124pub fn _mm512_movm_epi16(k: __mmask32) -> __m512i {
10125    unsafe {
10126        let one = _mm512_set1_epi16(
10127            1 << 15
10128                | 1 << 14
10129                | 1 << 13
10130                | 1 << 12
10131                | 1 << 11
10132                | 1 << 10
10133                | 1 << 9
10134                | 1 << 8
10135                | 1 << 7
10136                | 1 << 6
10137                | 1 << 5
10138                | 1 << 4
10139                | 1 << 3
10140                | 1 << 2
10141                | 1 << 1
10142                | 1 << 0,
10143        )
10144        .as_i16x32();
10145        transmute(simd_select_bitmask(k, one, i16x32::ZERO))
10146    }
10147}
10148
10149/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10150///
10151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi16&expand=3885)
10152#[inline]
10153#[target_feature(enable = "avx512bw,avx512vl")]
10154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10155#[cfg_attr(test, assert_instr(vpmovm2w))]
10156pub fn _mm256_movm_epi16(k: __mmask16) -> __m256i {
10157    unsafe {
10158        let one = _mm256_set1_epi16(
10159            1 << 15
10160                | 1 << 14
10161                | 1 << 13
10162                | 1 << 12
10163                | 1 << 11
10164                | 1 << 10
10165                | 1 << 9
10166                | 1 << 8
10167                | 1 << 7
10168                | 1 << 6
10169                | 1 << 5
10170                | 1 << 4
10171                | 1 << 3
10172                | 1 << 2
10173                | 1 << 1
10174                | 1 << 0,
10175        )
10176        .as_i16x16();
10177        transmute(simd_select_bitmask(k, one, i16x16::ZERO))
10178    }
10179}
10180
10181/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10182///
10183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi16&expand=3884)
10184#[inline]
10185#[target_feature(enable = "avx512bw,avx512vl")]
10186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10187#[cfg_attr(test, assert_instr(vpmovm2w))]
10188pub fn _mm_movm_epi16(k: __mmask8) -> __m128i {
10189    unsafe {
10190        let one = _mm_set1_epi16(
10191            1 << 15
10192                | 1 << 14
10193                | 1 << 13
10194                | 1 << 12
10195                | 1 << 11
10196                | 1 << 10
10197                | 1 << 9
10198                | 1 << 8
10199                | 1 << 7
10200                | 1 << 6
10201                | 1 << 5
10202                | 1 << 4
10203                | 1 << 3
10204                | 1 << 2
10205                | 1 << 1
10206                | 1 << 0,
10207        )
10208        .as_i16x8();
10209        transmute(simd_select_bitmask(k, one, i16x8::ZERO))
10210    }
10211}
10212
10213/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10214///
10215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi8&expand=3895)
10216#[inline]
10217#[target_feature(enable = "avx512bw")]
10218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10219#[cfg_attr(test, assert_instr(vpmovm2b))]
10220pub fn _mm512_movm_epi8(k: __mmask64) -> __m512i {
10221    unsafe {
10222        let one =
10223            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
10224                .as_i8x64();
10225        transmute(simd_select_bitmask(k, one, i8x64::ZERO))
10226    }
10227}
10228
10229/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10230///
10231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi8&expand=3894)
10232#[inline]
10233#[target_feature(enable = "avx512bw,avx512vl")]
10234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10235#[cfg_attr(test, assert_instr(vpmovm2b))]
10236pub fn _mm256_movm_epi8(k: __mmask32) -> __m256i {
10237    unsafe {
10238        let one =
10239            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
10240                .as_i8x32();
10241        transmute(simd_select_bitmask(k, one, i8x32::ZERO))
10242    }
10243}
10244
10245/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10246///
10247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi8&expand=3893)
10248#[inline]
10249#[target_feature(enable = "avx512bw,avx512vl")]
10250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10251#[cfg_attr(test, assert_instr(vpmovm2b))]
10252pub fn _mm_movm_epi8(k: __mmask16) -> __m128i {
10253    unsafe {
10254        let one =
10255            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
10256                .as_i8x16();
10257        transmute(simd_select_bitmask(k, one, i8x16::ZERO))
10258    }
10259}
10260
10261/// Convert 32-bit mask a into an integer value, and store the result in dst.
10262///
10263/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#_cvtmask32_u32)
10264#[inline]
10265#[target_feature(enable = "avx512bw")]
10266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10267pub fn _cvtmask32_u32(a: __mmask32) -> u32 {
10268    a
10269}
10270
10271/// Convert integer value a into an 32-bit mask, and store the result in k.
10272///
10273/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask32)
10274#[inline]
10275#[target_feature(enable = "avx512bw")]
10276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10277pub fn _cvtu32_mask32(a: u32) -> __mmask32 {
10278    a
10279}
10280
10281/// Add 32-bit masks in a and b, and store the result in k.
10282///
10283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask32&expand=3207)
10284#[inline]
10285#[target_feature(enable = "avx512bw")]
10286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10287pub fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10288    a + b
10289}
10290
10291/// Add 64-bit masks in a and b, and store the result in k.
10292///
10293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask64&expand=3208)
10294#[inline]
10295#[target_feature(enable = "avx512bw")]
10296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10297pub fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10298    a + b
10299}
10300
10301/// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
10302///
10303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask32&expand=3213)
10304#[inline]
10305#[target_feature(enable = "avx512bw")]
10306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10307pub fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10308    a & b
10309}
10310
10311/// Compute the bitwise AND of 64-bit masks a and b, and store the result in k.
10312///
10313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask64&expand=3214)
10314#[inline]
10315#[target_feature(enable = "avx512bw")]
10316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10317pub fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10318    a & b
10319}
10320
10321/// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
10322///
10323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask32&expand=3234)
10324#[inline]
10325#[target_feature(enable = "avx512bw")]
10326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10327pub fn _knot_mask32(a: __mmask32) -> __mmask32 {
10328    !a
10329}
10330
10331/// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
10332///
10333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask64&expand=3235)
10334#[inline]
10335#[target_feature(enable = "avx512bw")]
10336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10337pub fn _knot_mask64(a: __mmask64) -> __mmask64 {
10338    !a
10339}
10340
10341/// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
10342///
10343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask32&expand=3219)
10344#[inline]
10345#[target_feature(enable = "avx512bw")]
10346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10347pub fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10348    _knot_mask32(a) & b
10349}
10350
10351/// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
10352///
10353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask64&expand=3220)
10354#[inline]
10355#[target_feature(enable = "avx512bw")]
10356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10357pub fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10358    _knot_mask64(a) & b
10359}
10360
10361/// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
10362///
10363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask32&expand=3240)
10364#[inline]
10365#[target_feature(enable = "avx512bw")]
10366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10367pub fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10368    a | b
10369}
10370
10371/// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
10372///
10373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask64&expand=3241)
10374#[inline]
10375#[target_feature(enable = "avx512bw")]
10376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10377pub fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10378    a | b
10379}
10380
10381/// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
10382///
10383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask32&expand=3292)
10384#[inline]
10385#[target_feature(enable = "avx512bw")]
10386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10387pub fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10388    a ^ b
10389}
10390
10391/// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
10392///
10393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask64&expand=3293)
10394#[inline]
10395#[target_feature(enable = "avx512bw")]
10396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10397pub fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10398    a ^ b
10399}
10400
10401/// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
10402///
10403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask32&expand=3286)
10404#[inline]
10405#[target_feature(enable = "avx512bw")]
10406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10407pub fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10408    _knot_mask32(a ^ b)
10409}
10410
10411/// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
10412///
10413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask64&expand=3287)
10414#[inline]
10415#[target_feature(enable = "avx512bw")]
10416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10417pub fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10418    _knot_mask64(a ^ b)
10419}
10420
10421/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10422/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
10423///
10424/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask32_u8)
10425#[inline]
10426#[target_feature(enable = "avx512bw")]
10427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10428pub unsafe fn _kortest_mask32_u8(a: __mmask32, b: __mmask32, all_ones: *mut u8) -> u8 {
10429    let tmp = _kor_mask32(a, b);
10430    *all_ones = (tmp == 0xffffffff) as u8;
10431    (tmp == 0) as u8
10432}
10433
10434/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10435/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
10436///
10437/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask64_u8)
10438#[inline]
10439#[target_feature(enable = "avx512bw")]
10440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10441pub unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8) -> u8 {
10442    let tmp = _kor_mask64(a, b);
10443    *all_ones = (tmp == 0xffffffff_ffffffff) as u8;
10444    (tmp == 0) as u8
10445}
10446
10447/// Compute the bitwise OR of 32-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
10448/// store 0 in dst.
10449///
10450/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask32_u8)
10451#[inline]
10452#[target_feature(enable = "avx512bw")]
10453#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10454pub fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10455    (_kor_mask32(a, b) == 0xffffffff) as u8
10456}
10457
10458/// Compute the bitwise OR of 64-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
10459/// store 0 in dst.
10460///
10461/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask64_u8)
10462#[inline]
10463#[target_feature(enable = "avx512bw")]
10464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10465pub fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10466    (_kor_mask64(a, b) == 0xffffffff_ffffffff) as u8
10467}
10468
10469/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10470/// store 0 in dst.
10471///
10472/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask32_u8)
10473#[inline]
10474#[target_feature(enable = "avx512bw")]
10475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10476pub fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10477    (_kor_mask32(a, b) == 0) as u8
10478}
10479
10480/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10481/// store 0 in dst.
10482///
10483/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask64_u8)
10484#[inline]
10485#[target_feature(enable = "avx512bw")]
10486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10487pub fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10488    (_kor_mask64(a, b) == 0) as u8
10489}
10490
10491/// Shift the bits of 32-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10492///
10493/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask32)
10494#[inline]
10495#[target_feature(enable = "avx512bw")]
10496#[rustc_legacy_const_generics(1)]
10497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10498pub fn _kshiftli_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
10499    a.unbounded_shl(COUNT)
10500}
10501
10502/// Shift the bits of 64-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10503///
10504/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask64)
10505#[inline]
10506#[target_feature(enable = "avx512bw")]
10507#[rustc_legacy_const_generics(1)]
10508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10509pub fn _kshiftli_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
10510    a.unbounded_shl(COUNT)
10511}
10512
10513/// Shift the bits of 32-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10514///
10515/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask32)
10516#[inline]
10517#[target_feature(enable = "avx512bw")]
10518#[rustc_legacy_const_generics(1)]
10519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10520pub fn _kshiftri_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
10521    a.unbounded_shr(COUNT)
10522}
10523
10524/// Shift the bits of 64-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10525///
10526/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask64)
10527#[inline]
10528#[target_feature(enable = "avx512bw")]
10529#[rustc_legacy_const_generics(1)]
10530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10531pub fn _kshiftri_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
10532    a.unbounded_shr(COUNT)
10533}
10534
10535/// Compute the bitwise AND of 32-bit masks a and b, and if the result is all zeros, store 1 in dst,
10536/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
10537/// zeros, store 1 in and_not, otherwise store 0 in and_not.
10538///
10539/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask32_u8)
10540#[inline]
10541#[target_feature(enable = "avx512bw")]
10542#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10543pub unsafe fn _ktest_mask32_u8(a: __mmask32, b: __mmask32, and_not: *mut u8) -> u8 {
10544    *and_not = (_kandn_mask32(a, b) == 0) as u8;
10545    (_kand_mask32(a, b) == 0) as u8
10546}
10547
10548/// Compute the bitwise AND of 64-bit masks a and b, and if the result is all zeros, store 1 in dst,
10549/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
10550/// zeros, store 1 in and_not, otherwise store 0 in and_not.
10551///
10552/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask64_u8)
10553#[inline]
10554#[target_feature(enable = "avx512bw")]
10555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10556pub unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) -> u8 {
10557    *and_not = (_kandn_mask64(a, b) == 0) as u8;
10558    (_kand_mask64(a, b) == 0) as u8
10559}
10560
10561/// Compute the bitwise NOT of 32-bit mask a and then AND with 16-bit mask b, if the result is all
10562/// zeros, store 1 in dst, otherwise store 0 in dst.
10563///
10564/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask32_u8)
10565#[inline]
10566#[target_feature(enable = "avx512bw")]
10567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10568pub fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10569    (_kandn_mask32(a, b) == 0) as u8
10570}
10571
10572/// Compute the bitwise NOT of 64-bit mask a and then AND with 8-bit mask b, if the result is all
10573/// zeros, store 1 in dst, otherwise store 0 in dst.
10574///
10575/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask64_u8)
10576#[inline]
10577#[target_feature(enable = "avx512bw")]
10578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10579pub fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10580    (_kandn_mask64(a, b) == 0) as u8
10581}
10582
10583/// Compute the bitwise AND of 32-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
10584/// store 0 in dst.
10585///
10586/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask32_u8)
10587#[inline]
10588#[target_feature(enable = "avx512bw")]
10589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10590pub fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10591    (_kand_mask32(a, b) == 0) as u8
10592}
10593
10594/// Compute the bitwise AND of 64-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
10595/// store 0 in dst.
10596///
10597/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask64_u8)
10598#[inline]
10599#[target_feature(enable = "avx512bw")]
10600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10601pub fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10602    (_kand_mask64(a, b) == 0) as u8
10603}
10604
10605/// Unpack and interleave 16 bits from masks a and b, and store the 32-bit result in k.
10606///
10607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackw)
10608#[inline]
10609#[target_feature(enable = "avx512bw")]
10610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10611#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckwd
10612pub fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 {
10613    ((a & 0xffff) << 16) | (b & 0xffff)
10614}
10615
10616/// Unpack and interleave 32 bits from masks a and b, and store the 64-bit result in k.
10617///
10618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackd)
10619#[inline]
10620#[target_feature(enable = "avx512bw")]
10621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10622#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckdq
10623pub fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 {
10624    ((a & 0xffffffff) << 32) | (b & 0xffffffff)
10625}
10626
10627/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10628///
10629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi8&expand=1407)
10630#[inline]
10631#[target_feature(enable = "avx512bw")]
10632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10633#[cfg_attr(test, assert_instr(vpmovwb))]
10634pub fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i {
10635    unsafe {
10636        let a = a.as_i16x32();
10637        transmute::<i8x32, _>(simd_cast(a))
10638    }
10639}
10640
10641/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10642///
10643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi8&expand=1408)
10644#[inline]
10645#[target_feature(enable = "avx512bw")]
10646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10647#[cfg_attr(test, assert_instr(vpmovwb))]
10648pub fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10649    unsafe {
10650        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
10651        transmute(simd_select_bitmask(k, convert, src.as_i8x32()))
10652    }
10653}
10654
10655/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10656///
10657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi8&expand=1409)
10658#[inline]
10659#[target_feature(enable = "avx512bw")]
10660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10661#[cfg_attr(test, assert_instr(vpmovwb))]
10662pub fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10663    unsafe {
10664        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
10665        transmute(simd_select_bitmask(k, convert, i8x32::ZERO))
10666    }
10667}
10668
10669/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10670///
10671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi8&expand=1404)
10672#[inline]
10673#[target_feature(enable = "avx512bw,avx512vl")]
10674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10675#[cfg_attr(test, assert_instr(vpmovwb))]
10676pub fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i {
10677    unsafe {
10678        let a = a.as_i16x16();
10679        transmute::<i8x16, _>(simd_cast(a))
10680    }
10681}
10682
10683/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10684///
10685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi8&expand=1405)
10686#[inline]
10687#[target_feature(enable = "avx512bw,avx512vl")]
10688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10689#[cfg_attr(test, assert_instr(vpmovwb))]
10690pub fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10691    unsafe {
10692        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
10693        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
10694    }
10695}
10696
10697/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10698///
10699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi8&expand=1406)
10700#[inline]
10701#[target_feature(enable = "avx512bw,avx512vl")]
10702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10703#[cfg_attr(test, assert_instr(vpmovwb))]
10704pub fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10705    unsafe {
10706        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
10707        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
10708    }
10709}
10710
10711/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10712///
10713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi8&expand=1401)
10714#[inline]
10715#[target_feature(enable = "avx512bw,avx512vl")]
10716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10717#[cfg_attr(test, assert_instr(vpmovwb))]
10718pub fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
10719    unsafe {
10720        let a = a.as_i16x8();
10721        let v256: i16x16 = simd_shuffle!(
10722            a,
10723            i16x8::ZERO,
10724            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
10725        );
10726        transmute::<i8x16, _>(simd_cast(v256))
10727    }
10728}
10729
10730/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10731///
10732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi8&expand=1402)
10733#[inline]
10734#[target_feature(enable = "avx512bw,avx512vl")]
10735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10736#[cfg_attr(test, assert_instr(vpmovwb))]
10737pub fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10738    unsafe {
10739        let convert = _mm_cvtepi16_epi8(a).as_i8x16();
10740        let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
10741        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
10742    }
10743}
10744
10745/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10746///
10747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi8&expand=1403)
10748#[inline]
10749#[target_feature(enable = "avx512bw,avx512vl")]
10750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10751#[cfg_attr(test, assert_instr(vpmovwb))]
10752pub fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10753    unsafe {
10754        let convert = _mm_cvtepi16_epi8(a).as_i8x16();
10755        let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
10756        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
10757    }
10758}
10759
10760/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10761///
10762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi16_epi8&expand=1807)
10763#[inline]
10764#[target_feature(enable = "avx512bw")]
10765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10766#[cfg_attr(test, assert_instr(vpmovswb))]
10767pub fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
10768    unsafe {
10769        transmute(vpmovswb(
10770            a.as_i16x32(),
10771            i8x32::ZERO,
10772            0b11111111_11111111_11111111_11111111,
10773        ))
10774    }
10775}
10776
10777/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10778///
10779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_epi8&expand=1808)
10780#[inline]
10781#[target_feature(enable = "avx512bw")]
10782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10783#[cfg_attr(test, assert_instr(vpmovswb))]
10784pub fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10785    unsafe { transmute(vpmovswb(a.as_i16x32(), src.as_i8x32(), k)) }
10786}
10787
10788/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10789///
10790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi16_epi8&expand=1809)
10791#[inline]
10792#[target_feature(enable = "avx512bw")]
10793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10794#[cfg_attr(test, assert_instr(vpmovswb))]
10795pub fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10796    unsafe { transmute(vpmovswb(a.as_i16x32(), i8x32::ZERO, k)) }
10797}
10798
10799/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10800///
10801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi16_epi8&expand=1804)
10802#[inline]
10803#[target_feature(enable = "avx512bw,avx512vl")]
10804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10805#[cfg_attr(test, assert_instr(vpmovswb))]
10806pub fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
10807    unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, 0b11111111_11111111)) }
10808}
10809
10810/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10811///
10812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_epi8&expand=1805)
10813#[inline]
10814#[target_feature(enable = "avx512bw,avx512vl")]
10815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10816#[cfg_attr(test, assert_instr(vpmovswb))]
10817pub fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10818    unsafe { transmute(vpmovswb256(a.as_i16x16(), src.as_i8x16(), k)) }
10819}
10820
10821/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10822///
10823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi16_epi8&expand=1806)
10824#[inline]
10825#[target_feature(enable = "avx512bw,avx512vl")]
10826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10827#[cfg_attr(test, assert_instr(vpmovswb))]
10828pub fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10829    unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, k)) }
10830}
10831
10832/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10833///
10834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi16_epi8&expand=1801)
10835#[inline]
10836#[target_feature(enable = "avx512bw,avx512vl")]
10837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10838#[cfg_attr(test, assert_instr(vpmovswb))]
10839pub fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i {
10840    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, 0b11111111)) }
10841}
10842
10843/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10844///
10845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_epi8&expand=1802)
10846#[inline]
10847#[target_feature(enable = "avx512bw,avx512vl")]
10848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10849#[cfg_attr(test, assert_instr(vpmovswb))]
10850pub fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10851    unsafe { transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k)) }
10852}
10853
10854/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10855///
10856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi16_epi8&expand=1803)
10857#[inline]
10858#[target_feature(enable = "avx512bw,avx512vl")]
10859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10860#[cfg_attr(test, assert_instr(vpmovswb))]
10861pub fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10862    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, k)) }
10863}
10864
10865/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10866///
10867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi16_epi8&expand=2042)
10868#[inline]
10869#[target_feature(enable = "avx512bw")]
10870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10871#[cfg_attr(test, assert_instr(vpmovuswb))]
10872pub fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
10873    unsafe {
10874        transmute(vpmovuswb(
10875            a.as_u16x32(),
10876            u8x32::ZERO,
10877            0b11111111_11111111_11111111_11111111,
10878        ))
10879    }
10880}
10881
10882/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10883///
10884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_epi8&expand=2043)
10885#[inline]
10886#[target_feature(enable = "avx512bw")]
10887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10888#[cfg_attr(test, assert_instr(vpmovuswb))]
10889pub fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10890    unsafe { transmute(vpmovuswb(a.as_u16x32(), src.as_u8x32(), k)) }
10891}
10892
10893/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10894///
10895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi16_epi8&expand=2044)
10896#[inline]
10897#[target_feature(enable = "avx512bw")]
10898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10899#[cfg_attr(test, assert_instr(vpmovuswb))]
10900pub fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10901    unsafe { transmute(vpmovuswb(a.as_u16x32(), u8x32::ZERO, k)) }
10902}
10903
10904/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10905///
10906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi16_epi8&expand=2039)
10907#[inline]
10908#[target_feature(enable = "avx512bw,avx512vl")]
10909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10910#[cfg_attr(test, assert_instr(vpmovuswb))]
10911pub fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
10912    unsafe {
10913        transmute(vpmovuswb256(
10914            a.as_u16x16(),
10915            u8x16::ZERO,
10916            0b11111111_11111111,
10917        ))
10918    }
10919}
10920
10921/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10922///
10923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_epi8&expand=2040)
10924#[inline]
10925#[target_feature(enable = "avx512bw,avx512vl")]
10926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10927#[cfg_attr(test, assert_instr(vpmovuswb))]
10928pub fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10929    unsafe { transmute(vpmovuswb256(a.as_u16x16(), src.as_u8x16(), k)) }
10930}
10931
10932/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10933///
10934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi16_epi8&expand=2041)
10935#[inline]
10936#[target_feature(enable = "avx512bw,avx512vl")]
10937#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10938#[cfg_attr(test, assert_instr(vpmovuswb))]
10939pub fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10940    unsafe { transmute(vpmovuswb256(a.as_u16x16(), u8x16::ZERO, k)) }
10941}
10942
10943/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10944///
10945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi16_epi8&expand=2036)
10946#[inline]
10947#[target_feature(enable = "avx512bw,avx512vl")]
10948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10949#[cfg_attr(test, assert_instr(vpmovuswb))]
10950pub fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i {
10951    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, 0b11111111)) }
10952}
10953
10954/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10955///
10956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_epi8&expand=2037)
10957#[inline]
10958#[target_feature(enable = "avx512bw,avx512vl")]
10959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10960#[cfg_attr(test, assert_instr(vpmovuswb))]
10961pub fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10962    unsafe { transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k)) }
10963}
10964
10965/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10966///
10967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi16_epi8&expand=2038)
10968#[inline]
10969#[target_feature(enable = "avx512bw,avx512vl")]
10970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10971#[cfg_attr(test, assert_instr(vpmovuswb))]
10972pub fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10973    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, k)) }
10974}
10975
10976/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst.
10977///
10978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi16&expand=1526)
10979#[inline]
10980#[target_feature(enable = "avx512bw")]
10981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10982#[cfg_attr(test, assert_instr(vpmovsxbw))]
10983pub fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i {
10984    unsafe {
10985        let a = a.as_i8x32();
10986        transmute::<i16x32, _>(simd_cast(a))
10987    }
10988}
10989
10990/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10991///
10992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi16&expand=1527)
10993#[inline]
10994#[target_feature(enable = "avx512bw")]
10995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10996#[cfg_attr(test, assert_instr(vpmovsxbw))]
10997pub fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
10998    unsafe {
10999        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
11000        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
11001    }
11002}
11003
11004/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11005///
11006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi16&expand=1528)
11007#[inline]
11008#[target_feature(enable = "avx512bw")]
11009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11010#[cfg_attr(test, assert_instr(vpmovsxbw))]
11011pub fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i {
11012    unsafe {
11013        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
11014        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
11015    }
11016}
11017
11018/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11019///
11020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi16&expand=1524)
11021#[inline]
11022#[target_feature(enable = "avx512bw,avx512vl")]
11023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11024#[cfg_attr(test, assert_instr(vpmovsxbw))]
11025pub fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
11026    unsafe {
11027        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
11028        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
11029    }
11030}
11031
11032/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11033///
11034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi16&expand=1525)
11035#[inline]
11036#[target_feature(enable = "avx512bw,avx512vl")]
11037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11038#[cfg_attr(test, assert_instr(vpmovsxbw))]
11039pub fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i {
11040    unsafe {
11041        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
11042        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
11043    }
11044}
11045
11046/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11047///
11048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi16&expand=1521)
11049#[inline]
11050#[target_feature(enable = "avx512bw,avx512vl")]
11051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11052#[cfg_attr(test, assert_instr(vpmovsxbw))]
11053pub fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11054    unsafe {
11055        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
11056        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
11057    }
11058}
11059
11060/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11061///
11062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi16&expand=1522)
11063#[inline]
11064#[target_feature(enable = "avx512bw,avx512vl")]
11065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11066#[cfg_attr(test, assert_instr(vpmovsxbw))]
11067pub fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i {
11068    unsafe {
11069        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
11070        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
11071    }
11072}
11073
11074/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst.
11075///
11076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi16&expand=1612)
11077#[inline]
11078#[target_feature(enable = "avx512bw")]
11079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11080#[cfg_attr(test, assert_instr(vpmovzxbw))]
11081pub fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i {
11082    unsafe {
11083        let a = a.as_u8x32();
11084        transmute::<i16x32, _>(simd_cast(a))
11085    }
11086}
11087
11088/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11089///
11090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi16&expand=1613)
11091#[inline]
11092#[target_feature(enable = "avx512bw")]
11093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11094#[cfg_attr(test, assert_instr(vpmovzxbw))]
11095pub fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
11096    unsafe {
11097        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11098        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
11099    }
11100}
11101
11102/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11103///
11104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi16&expand=1614)
11105#[inline]
11106#[target_feature(enable = "avx512bw")]
11107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11108#[cfg_attr(test, assert_instr(vpmovzxbw))]
11109pub fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i {
11110    unsafe {
11111        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11112        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
11113    }
11114}
11115
11116/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11117///
11118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi16&expand=1610)
11119#[inline]
11120#[target_feature(enable = "avx512bw,avx512vl")]
11121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11122#[cfg_attr(test, assert_instr(vpmovzxbw))]
11123pub fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
11124    unsafe {
11125        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11126        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
11127    }
11128}
11129
11130/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11131///
11132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi16&expand=1611)
11133#[inline]
11134#[target_feature(enable = "avx512bw,avx512vl")]
11135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11136#[cfg_attr(test, assert_instr(vpmovzxbw))]
11137pub fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i {
11138    unsafe {
11139        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11140        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
11141    }
11142}
11143
11144/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11145///
11146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi16&expand=1607)
11147#[inline]
11148#[target_feature(enable = "avx512bw,avx512vl")]
11149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11150#[cfg_attr(test, assert_instr(vpmovzxbw))]
11151pub fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11152    unsafe {
11153        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
11154        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
11155    }
11156}
11157
11158/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11159///
11160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi16&expand=1608)
11161#[inline]
11162#[target_feature(enable = "avx512bw,avx512vl")]
11163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11164#[cfg_attr(test, assert_instr(vpmovzxbw))]
11165pub fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
11166    unsafe {
11167        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
11168        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
11169    }
11170}
11171
11172/// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst.
11173///
11174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bslli_epi128&expand=591)
11175#[inline]
11176#[target_feature(enable = "avx512bw")]
11177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11178#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
11179#[rustc_legacy_const_generics(1)]
11180pub fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
11181    unsafe {
11182        static_assert_uimm_bits!(IMM8, 8);
11183        const fn mask(shift: i32, i: u32) -> u32 {
11184            let shift = shift as u32 & 0xff;
11185            if shift > 15 || i % 16 < shift {
11186                0
11187            } else {
11188                64 + (i - shift)
11189            }
11190        }
11191        let a = a.as_i8x64();
11192        let zero = i8x64::ZERO;
11193        let r: i8x64 = simd_shuffle!(
11194            zero,
11195            a,
11196            [
11197                mask(IMM8, 0),
11198                mask(IMM8, 1),
11199                mask(IMM8, 2),
11200                mask(IMM8, 3),
11201                mask(IMM8, 4),
11202                mask(IMM8, 5),
11203                mask(IMM8, 6),
11204                mask(IMM8, 7),
11205                mask(IMM8, 8),
11206                mask(IMM8, 9),
11207                mask(IMM8, 10),
11208                mask(IMM8, 11),
11209                mask(IMM8, 12),
11210                mask(IMM8, 13),
11211                mask(IMM8, 14),
11212                mask(IMM8, 15),
11213                mask(IMM8, 16),
11214                mask(IMM8, 17),
11215                mask(IMM8, 18),
11216                mask(IMM8, 19),
11217                mask(IMM8, 20),
11218                mask(IMM8, 21),
11219                mask(IMM8, 22),
11220                mask(IMM8, 23),
11221                mask(IMM8, 24),
11222                mask(IMM8, 25),
11223                mask(IMM8, 26),
11224                mask(IMM8, 27),
11225                mask(IMM8, 28),
11226                mask(IMM8, 29),
11227                mask(IMM8, 30),
11228                mask(IMM8, 31),
11229                mask(IMM8, 32),
11230                mask(IMM8, 33),
11231                mask(IMM8, 34),
11232                mask(IMM8, 35),
11233                mask(IMM8, 36),
11234                mask(IMM8, 37),
11235                mask(IMM8, 38),
11236                mask(IMM8, 39),
11237                mask(IMM8, 40),
11238                mask(IMM8, 41),
11239                mask(IMM8, 42),
11240                mask(IMM8, 43),
11241                mask(IMM8, 44),
11242                mask(IMM8, 45),
11243                mask(IMM8, 46),
11244                mask(IMM8, 47),
11245                mask(IMM8, 48),
11246                mask(IMM8, 49),
11247                mask(IMM8, 50),
11248                mask(IMM8, 51),
11249                mask(IMM8, 52),
11250                mask(IMM8, 53),
11251                mask(IMM8, 54),
11252                mask(IMM8, 55),
11253                mask(IMM8, 56),
11254                mask(IMM8, 57),
11255                mask(IMM8, 58),
11256                mask(IMM8, 59),
11257                mask(IMM8, 60),
11258                mask(IMM8, 61),
11259                mask(IMM8, 62),
11260                mask(IMM8, 63),
11261            ],
11262        );
11263        transmute(r)
11264    }
11265}
11266
11267/// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst.
11268///
11269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bsrli_epi128&expand=594)
11270#[inline]
11271#[target_feature(enable = "avx512bw")]
11272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11273#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
11274#[rustc_legacy_const_generics(1)]
11275pub fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
11276    unsafe {
11277        static_assert_uimm_bits!(IMM8, 8);
11278        const fn mask(shift: i32, i: u32) -> u32 {
11279            let shift = shift as u32 & 0xff;
11280            if shift > 15 || (15 - (i % 16)) < shift {
11281                0
11282            } else {
11283                64 + (i + shift)
11284            }
11285        }
11286        let a = a.as_i8x64();
11287        let zero = i8x64::ZERO;
11288        let r: i8x64 = simd_shuffle!(
11289            zero,
11290            a,
11291            [
11292                mask(IMM8, 0),
11293                mask(IMM8, 1),
11294                mask(IMM8, 2),
11295                mask(IMM8, 3),
11296                mask(IMM8, 4),
11297                mask(IMM8, 5),
11298                mask(IMM8, 6),
11299                mask(IMM8, 7),
11300                mask(IMM8, 8),
11301                mask(IMM8, 9),
11302                mask(IMM8, 10),
11303                mask(IMM8, 11),
11304                mask(IMM8, 12),
11305                mask(IMM8, 13),
11306                mask(IMM8, 14),
11307                mask(IMM8, 15),
11308                mask(IMM8, 16),
11309                mask(IMM8, 17),
11310                mask(IMM8, 18),
11311                mask(IMM8, 19),
11312                mask(IMM8, 20),
11313                mask(IMM8, 21),
11314                mask(IMM8, 22),
11315                mask(IMM8, 23),
11316                mask(IMM8, 24),
11317                mask(IMM8, 25),
11318                mask(IMM8, 26),
11319                mask(IMM8, 27),
11320                mask(IMM8, 28),
11321                mask(IMM8, 29),
11322                mask(IMM8, 30),
11323                mask(IMM8, 31),
11324                mask(IMM8, 32),
11325                mask(IMM8, 33),
11326                mask(IMM8, 34),
11327                mask(IMM8, 35),
11328                mask(IMM8, 36),
11329                mask(IMM8, 37),
11330                mask(IMM8, 38),
11331                mask(IMM8, 39),
11332                mask(IMM8, 40),
11333                mask(IMM8, 41),
11334                mask(IMM8, 42),
11335                mask(IMM8, 43),
11336                mask(IMM8, 44),
11337                mask(IMM8, 45),
11338                mask(IMM8, 46),
11339                mask(IMM8, 47),
11340                mask(IMM8, 48),
11341                mask(IMM8, 49),
11342                mask(IMM8, 50),
11343                mask(IMM8, 51),
11344                mask(IMM8, 52),
11345                mask(IMM8, 53),
11346                mask(IMM8, 54),
11347                mask(IMM8, 55),
11348                mask(IMM8, 56),
11349                mask(IMM8, 57),
11350                mask(IMM8, 58),
11351                mask(IMM8, 59),
11352                mask(IMM8, 60),
11353                mask(IMM8, 61),
11354                mask(IMM8, 62),
11355                mask(IMM8, 63),
11356            ],
11357        );
11358        transmute(r)
11359    }
11360}
11361
11362/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst.
11363/// Unlike [`_mm_alignr_epi8`], [`_mm256_alignr_epi8`] functions, where the entire input vectors are concatenated to the temporary result,
11364/// this concatenation happens in 4 steps, where each step builds 32-byte temporary result.
11365///
11366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi8&expand=263)
11367#[inline]
11368#[target_feature(enable = "avx512bw")]
11369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11370#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11371#[rustc_legacy_const_generics(2)]
11372pub fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
11373    const fn mask(shift: u32, i: u32) -> u32 {
11374        let shift = shift % 16;
11375        let mod_i = i % 16;
11376        if mod_i < (16 - shift) {
11377            i + shift
11378        } else {
11379            i + 48 + shift
11380        }
11381    }
11382
11383    // If palignr is shifting the pair of vectors more than the size of two
11384    // lanes, emit zero.
11385    if IMM8 >= 32 {
11386        return _mm512_setzero_si512();
11387    }
11388    // If palignr is shifting the pair of input vectors more than one lane,
11389    // but less than two lanes, convert to shifting in zeroes.
11390    let (a, b) = if IMM8 > 16 {
11391        (_mm512_setzero_si512(), a)
11392    } else {
11393        (a, b)
11394    };
11395    unsafe {
11396        if IMM8 == 16 {
11397            return transmute(a);
11398        }
11399
11400        let r: i8x64 = simd_shuffle!(
11401            b.as_i8x64(),
11402            a.as_i8x64(),
11403            [
11404                mask(IMM8 as u32, 0),
11405                mask(IMM8 as u32, 1),
11406                mask(IMM8 as u32, 2),
11407                mask(IMM8 as u32, 3),
11408                mask(IMM8 as u32, 4),
11409                mask(IMM8 as u32, 5),
11410                mask(IMM8 as u32, 6),
11411                mask(IMM8 as u32, 7),
11412                mask(IMM8 as u32, 8),
11413                mask(IMM8 as u32, 9),
11414                mask(IMM8 as u32, 10),
11415                mask(IMM8 as u32, 11),
11416                mask(IMM8 as u32, 12),
11417                mask(IMM8 as u32, 13),
11418                mask(IMM8 as u32, 14),
11419                mask(IMM8 as u32, 15),
11420                mask(IMM8 as u32, 16),
11421                mask(IMM8 as u32, 17),
11422                mask(IMM8 as u32, 18),
11423                mask(IMM8 as u32, 19),
11424                mask(IMM8 as u32, 20),
11425                mask(IMM8 as u32, 21),
11426                mask(IMM8 as u32, 22),
11427                mask(IMM8 as u32, 23),
11428                mask(IMM8 as u32, 24),
11429                mask(IMM8 as u32, 25),
11430                mask(IMM8 as u32, 26),
11431                mask(IMM8 as u32, 27),
11432                mask(IMM8 as u32, 28),
11433                mask(IMM8 as u32, 29),
11434                mask(IMM8 as u32, 30),
11435                mask(IMM8 as u32, 31),
11436                mask(IMM8 as u32, 32),
11437                mask(IMM8 as u32, 33),
11438                mask(IMM8 as u32, 34),
11439                mask(IMM8 as u32, 35),
11440                mask(IMM8 as u32, 36),
11441                mask(IMM8 as u32, 37),
11442                mask(IMM8 as u32, 38),
11443                mask(IMM8 as u32, 39),
11444                mask(IMM8 as u32, 40),
11445                mask(IMM8 as u32, 41),
11446                mask(IMM8 as u32, 42),
11447                mask(IMM8 as u32, 43),
11448                mask(IMM8 as u32, 44),
11449                mask(IMM8 as u32, 45),
11450                mask(IMM8 as u32, 46),
11451                mask(IMM8 as u32, 47),
11452                mask(IMM8 as u32, 48),
11453                mask(IMM8 as u32, 49),
11454                mask(IMM8 as u32, 50),
11455                mask(IMM8 as u32, 51),
11456                mask(IMM8 as u32, 52),
11457                mask(IMM8 as u32, 53),
11458                mask(IMM8 as u32, 54),
11459                mask(IMM8 as u32, 55),
11460                mask(IMM8 as u32, 56),
11461                mask(IMM8 as u32, 57),
11462                mask(IMM8 as u32, 58),
11463                mask(IMM8 as u32, 59),
11464                mask(IMM8 as u32, 60),
11465                mask(IMM8 as u32, 61),
11466                mask(IMM8 as u32, 62),
11467                mask(IMM8 as u32, 63),
11468            ],
11469        );
11470        transmute(r)
11471    }
11472}
11473
11474/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11475///
11476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi8&expand=264)
11477#[inline]
11478#[target_feature(enable = "avx512bw")]
11479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11480#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11481#[rustc_legacy_const_generics(4)]
11482pub fn _mm512_mask_alignr_epi8<const IMM8: i32>(
11483    src: __m512i,
11484    k: __mmask64,
11485    a: __m512i,
11486    b: __m512i,
11487) -> __m512i {
11488    unsafe {
11489        static_assert_uimm_bits!(IMM8, 8);
11490        let r = _mm512_alignr_epi8::<IMM8>(a, b);
11491        transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64()))
11492    }
11493}
11494
11495/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11496///
11497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi8&expand=265)
11498#[inline]
11499#[target_feature(enable = "avx512bw")]
11500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11501#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11502#[rustc_legacy_const_generics(3)]
11503pub fn _mm512_maskz_alignr_epi8<const IMM8: i32>(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
11504    unsafe {
11505        static_assert_uimm_bits!(IMM8, 8);
11506        let r = _mm512_alignr_epi8::<IMM8>(a, b);
11507        transmute(simd_select_bitmask(k, r.as_i8x64(), i8x64::ZERO))
11508    }
11509}
11510
11511/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11512///
11513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi8&expand=261)
11514#[inline]
11515#[target_feature(enable = "avx512bw,avx512vl")]
11516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11517#[rustc_legacy_const_generics(4)]
11518#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11519pub fn _mm256_mask_alignr_epi8<const IMM8: i32>(
11520    src: __m256i,
11521    k: __mmask32,
11522    a: __m256i,
11523    b: __m256i,
11524) -> __m256i {
11525    unsafe {
11526        static_assert_uimm_bits!(IMM8, 8);
11527        let r = _mm256_alignr_epi8::<IMM8>(a, b);
11528        transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32()))
11529    }
11530}
11531
11532/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11533///
11534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi8&expand=262)
11535#[inline]
11536#[target_feature(enable = "avx512bw,avx512vl")]
11537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11538#[rustc_legacy_const_generics(3)]
11539#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11540pub fn _mm256_maskz_alignr_epi8<const IMM8: i32>(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
11541    unsafe {
11542        static_assert_uimm_bits!(IMM8, 8);
11543        let r = _mm256_alignr_epi8::<IMM8>(a, b);
11544        transmute(simd_select_bitmask(k, r.as_i8x32(), i8x32::ZERO))
11545    }
11546}
11547
11548/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11549///
11550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi8&expand=258)
11551#[inline]
11552#[target_feature(enable = "avx512bw,avx512vl")]
11553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11554#[rustc_legacy_const_generics(4)]
11555#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11556pub fn _mm_mask_alignr_epi8<const IMM8: i32>(
11557    src: __m128i,
11558    k: __mmask16,
11559    a: __m128i,
11560    b: __m128i,
11561) -> __m128i {
11562    unsafe {
11563        static_assert_uimm_bits!(IMM8, 8);
11564        let r = _mm_alignr_epi8::<IMM8>(a, b);
11565        transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16()))
11566    }
11567}
11568
11569/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11570///
11571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi8&expand=259)
11572#[inline]
11573#[target_feature(enable = "avx512bw,avx512vl")]
11574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11575#[rustc_legacy_const_generics(3)]
11576#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11577pub fn _mm_maskz_alignr_epi8<const IMM8: i32>(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
11578    unsafe {
11579        static_assert_uimm_bits!(IMM8, 8);
11580        let r = _mm_alignr_epi8::<IMM8>(a, b);
11581        transmute(simd_select_bitmask(k, r.as_i8x16(), i8x16::ZERO))
11582    }
11583}
11584
11585/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11586///
11587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812)
11588#[inline]
11589#[target_feature(enable = "avx512bw")]
11590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11591#[cfg_attr(test, assert_instr(vpmovswb))]
11592pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11593    vpmovswbmem(mem_addr, a.as_i16x32(), k);
11594}
11595
11596/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11597///
11598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811)
11599#[inline]
11600#[target_feature(enable = "avx512bw,avx512vl")]
11601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11602#[cfg_attr(test, assert_instr(vpmovswb))]
11603pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11604    vpmovswbmem256(mem_addr, a.as_i16x16(), k);
11605}
11606
11607/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11608///
11609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810)
11610#[inline]
11611#[target_feature(enable = "avx512bw,avx512vl")]
11612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11613#[cfg_attr(test, assert_instr(vpmovswb))]
11614pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11615    vpmovswbmem128(mem_addr, a.as_i16x8(), k);
11616}
11617
11618/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11619///
11620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412)
11621#[inline]
11622#[target_feature(enable = "avx512bw")]
11623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11624#[cfg_attr(test, assert_instr(vpmovwb))]
11625pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11626    vpmovwbmem(mem_addr, a.as_i16x32(), k);
11627}
11628
11629/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11630///
11631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411)
11632#[inline]
11633#[target_feature(enable = "avx512bw,avx512vl")]
11634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11635#[cfg_attr(test, assert_instr(vpmovwb))]
11636pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11637    vpmovwbmem256(mem_addr, a.as_i16x16(), k);
11638}
11639
11640/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11641///
11642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410)
11643#[inline]
11644#[target_feature(enable = "avx512bw,avx512vl")]
11645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11646#[cfg_attr(test, assert_instr(vpmovwb))]
11647pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11648    vpmovwbmem128(mem_addr, a.as_i16x8(), k);
11649}
11650
11651/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11652///
11653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047)
11654#[inline]
11655#[target_feature(enable = "avx512bw")]
11656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11657#[cfg_attr(test, assert_instr(vpmovuswb))]
11658pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11659    vpmovuswbmem(mem_addr, a.as_i16x32(), k);
11660}
11661
11662/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11663///
11664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046)
11665#[inline]
11666#[target_feature(enable = "avx512bw,avx512vl")]
11667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11668#[cfg_attr(test, assert_instr(vpmovuswb))]
11669pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11670    vpmovuswbmem256(mem_addr, a.as_i16x16(), k);
11671}
11672
11673/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11674///
11675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045)
11676#[inline]
11677#[target_feature(enable = "avx512bw,avx512vl")]
11678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11679#[cfg_attr(test, assert_instr(vpmovuswb))]
11680pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11681    vpmovuswbmem128(mem_addr, a.as_i16x8(), k);
11682}
11683
11684#[allow(improper_ctypes)]
11685unsafe extern "C" {
11686    #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
11687    fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
11688
11689    #[link_name = "llvm.x86.avx512.pmaddw.d.512"]
11690    fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
11691    #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
11692    fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32;
11693
11694    #[link_name = "llvm.x86.avx512.packssdw.512"]
11695    fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
11696    #[link_name = "llvm.x86.avx512.packsswb.512"]
11697    fn vpacksswb(a: i16x32, b: i16x32) -> i8x64;
11698    #[link_name = "llvm.x86.avx512.packusdw.512"]
11699    fn vpackusdw(a: i32x16, b: i32x16) -> u16x32;
11700    #[link_name = "llvm.x86.avx512.packuswb.512"]
11701    fn vpackuswb(a: i16x32, b: i16x32) -> u8x64;
11702
11703    #[link_name = "llvm.x86.avx512.psll.w.512"]
11704    fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
11705
11706    #[link_name = "llvm.x86.avx512.psrl.w.512"]
11707    fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
11708
11709    #[link_name = "llvm.x86.avx512.psra.w.512"]
11710    fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
11711
11712    #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
11713    fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
11714    #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
11715    fn vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x16;
11716    #[link_name = "llvm.x86.avx512.vpermi2var.hi.128"]
11717    fn vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x8;
11718
11719    #[link_name = "llvm.x86.avx512.permvar.hi.512"]
11720    fn vpermw(a: i16x32, idx: i16x32) -> i16x32;
11721    #[link_name = "llvm.x86.avx512.permvar.hi.256"]
11722    fn vpermw256(a: i16x16, idx: i16x16) -> i16x16;
11723    #[link_name = "llvm.x86.avx512.permvar.hi.128"]
11724    fn vpermw128(a: i16x8, idx: i16x8) -> i16x8;
11725
11726    #[link_name = "llvm.x86.avx512.pshuf.b.512"]
11727    fn vpshufb(a: i8x64, b: i8x64) -> i8x64;
11728
11729    #[link_name = "llvm.x86.avx512.psad.bw.512"]
11730    fn vpsadbw(a: u8x64, b: u8x64) -> u64x8;
11731
11732    #[link_name = "llvm.x86.avx512.dbpsadbw.512"]
11733    fn vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x32;
11734    #[link_name = "llvm.x86.avx512.dbpsadbw.256"]
11735    fn vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
11736    #[link_name = "llvm.x86.avx512.dbpsadbw.128"]
11737    fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8;
11738
11739    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.512"]
11740    fn vpmovswb(a: i16x32, src: i8x32, mask: u32) -> i8x32;
11741    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.256"]
11742    fn vpmovswb256(a: i16x16, src: i8x16, mask: u16) -> i8x16;
11743    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"]
11744    fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16;
11745
11746    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.512"]
11747    fn vpmovuswb(a: u16x32, src: u8x32, mask: u32) -> u8x32;
11748    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.256"]
11749    fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16;
11750    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
11751    fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
11752
11753    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
11754    fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11755    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"]
11756    fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11757    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
11758    fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11759
11760    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512"]
11761    fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11762    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256"]
11763    fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11764    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128"]
11765    fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11766
11767    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
11768    fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11769    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
11770    fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11771    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
11772    fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11773}
11774
11775#[cfg(test)]
11776mod tests {
11777
11778    use stdarch_test::simd_test;
11779
11780    use crate::core_arch::x86::*;
11781    use crate::hint::black_box;
11782    use crate::mem::{self};
11783
11784    #[simd_test(enable = "avx512bw")]
11785    unsafe fn test_mm512_abs_epi16() {
11786        let a = _mm512_set1_epi16(-1);
11787        let r = _mm512_abs_epi16(a);
11788        let e = _mm512_set1_epi16(1);
11789        assert_eq_m512i(r, e);
11790    }
11791
11792    #[simd_test(enable = "avx512bw")]
11793    unsafe fn test_mm512_mask_abs_epi16() {
11794        let a = _mm512_set1_epi16(-1);
11795        let r = _mm512_mask_abs_epi16(a, 0, a);
11796        assert_eq_m512i(r, a);
11797        let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a);
11798        #[rustfmt::skip]
11799        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11800                                 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11801        assert_eq_m512i(r, e);
11802    }
11803
11804    #[simd_test(enable = "avx512bw")]
11805    unsafe fn test_mm512_maskz_abs_epi16() {
11806        let a = _mm512_set1_epi16(-1);
11807        let r = _mm512_maskz_abs_epi16(0, a);
11808        assert_eq_m512i(r, _mm512_setzero_si512());
11809        let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a);
11810        #[rustfmt::skip]
11811        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11812                                  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11813        assert_eq_m512i(r, e);
11814    }
11815
11816    #[simd_test(enable = "avx512bw,avx512vl")]
11817    unsafe fn test_mm256_mask_abs_epi16() {
11818        let a = _mm256_set1_epi16(-1);
11819        let r = _mm256_mask_abs_epi16(a, 0, a);
11820        assert_eq_m256i(r, a);
11821        let r = _mm256_mask_abs_epi16(a, 0b00000000_11111111, a);
11822        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11823        assert_eq_m256i(r, e);
11824    }
11825
11826    #[simd_test(enable = "avx512bw,avx512vl")]
11827    unsafe fn test_mm256_maskz_abs_epi16() {
11828        let a = _mm256_set1_epi16(-1);
11829        let r = _mm256_maskz_abs_epi16(0, a);
11830        assert_eq_m256i(r, _mm256_setzero_si256());
11831        let r = _mm256_maskz_abs_epi16(0b00000000_11111111, a);
11832        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11833        assert_eq_m256i(r, e);
11834    }
11835
11836    #[simd_test(enable = "avx512bw,avx512vl")]
11837    unsafe fn test_mm_mask_abs_epi16() {
11838        let a = _mm_set1_epi16(-1);
11839        let r = _mm_mask_abs_epi16(a, 0, a);
11840        assert_eq_m128i(r, a);
11841        let r = _mm_mask_abs_epi16(a, 0b00001111, a);
11842        let e = _mm_set_epi16(-1, -1, -1, -1, 1, 1, 1, 1);
11843        assert_eq_m128i(r, e);
11844    }
11845
11846    #[simd_test(enable = "avx512bw,avx512vl")]
11847    unsafe fn test_mm_maskz_abs_epi16() {
11848        let a = _mm_set1_epi16(-1);
11849        let r = _mm_maskz_abs_epi16(0, a);
11850        assert_eq_m128i(r, _mm_setzero_si128());
11851        let r = _mm_maskz_abs_epi16(0b00001111, a);
11852        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
11853        assert_eq_m128i(r, e);
11854    }
11855
11856    #[simd_test(enable = "avx512bw")]
11857    unsafe fn test_mm512_abs_epi8() {
11858        let a = _mm512_set1_epi8(-1);
11859        let r = _mm512_abs_epi8(a);
11860        let e = _mm512_set1_epi8(1);
11861        assert_eq_m512i(r, e);
11862    }
11863
11864    #[simd_test(enable = "avx512bw")]
11865    unsafe fn test_mm512_mask_abs_epi8() {
11866        let a = _mm512_set1_epi8(-1);
11867        let r = _mm512_mask_abs_epi8(a, 0, a);
11868        assert_eq_m512i(r, a);
11869        let r = _mm512_mask_abs_epi8(
11870            a,
11871            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11872            a,
11873        );
11874        #[rustfmt::skip]
11875        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11876                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11877                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11878                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11879        assert_eq_m512i(r, e);
11880    }
11881
11882    #[simd_test(enable = "avx512bw")]
11883    unsafe fn test_mm512_maskz_abs_epi8() {
11884        let a = _mm512_set1_epi8(-1);
11885        let r = _mm512_maskz_abs_epi8(0, a);
11886        assert_eq_m512i(r, _mm512_setzero_si512());
11887        let r = _mm512_maskz_abs_epi8(
11888            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11889            a,
11890        );
11891        #[rustfmt::skip]
11892        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11893                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11894                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11895                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11896        assert_eq_m512i(r, e);
11897    }
11898
11899    #[simd_test(enable = "avx512bw,avx512vl")]
11900    unsafe fn test_mm256_mask_abs_epi8() {
11901        let a = _mm256_set1_epi8(-1);
11902        let r = _mm256_mask_abs_epi8(a, 0, a);
11903        assert_eq_m256i(r, a);
11904        let r = _mm256_mask_abs_epi8(a, 0b00000000_11111111_00000000_11111111, a);
11905        #[rustfmt::skip]
11906        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11907                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11908        assert_eq_m256i(r, e);
11909    }
11910
11911    #[simd_test(enable = "avx512bw,avx512vl")]
11912    unsafe fn test_mm256_maskz_abs_epi8() {
11913        let a = _mm256_set1_epi8(-1);
11914        let r = _mm256_maskz_abs_epi8(0, a);
11915        assert_eq_m256i(r, _mm256_setzero_si256());
11916        let r = _mm256_maskz_abs_epi8(0b00000000_11111111_00000000_11111111, a);
11917        #[rustfmt::skip]
11918        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11919                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11920        assert_eq_m256i(r, e);
11921    }
11922
11923    #[simd_test(enable = "avx512bw,avx512vl")]
11924    unsafe fn test_mm_mask_abs_epi8() {
11925        let a = _mm_set1_epi8(-1);
11926        let r = _mm_mask_abs_epi8(a, 0, a);
11927        assert_eq_m128i(r, a);
11928        let r = _mm_mask_abs_epi8(a, 0b00000000_11111111, a);
11929        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11930        assert_eq_m128i(r, e);
11931    }
11932
11933    #[simd_test(enable = "avx512bw,avx512vl")]
11934    unsafe fn test_mm_maskz_abs_epi8() {
11935        let a = _mm_set1_epi8(-1);
11936        let r = _mm_maskz_abs_epi8(0, a);
11937        assert_eq_m128i(r, _mm_setzero_si128());
11938        let r = _mm_maskz_abs_epi8(0b00000000_11111111, a);
11939        #[rustfmt::skip]
11940        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11941        assert_eq_m128i(r, e);
11942    }
11943
11944    #[simd_test(enable = "avx512bw")]
11945    unsafe fn test_mm512_add_epi16() {
11946        let a = _mm512_set1_epi16(1);
11947        let b = _mm512_set1_epi16(2);
11948        let r = _mm512_add_epi16(a, b);
11949        let e = _mm512_set1_epi16(3);
11950        assert_eq_m512i(r, e);
11951    }
11952
11953    #[simd_test(enable = "avx512bw")]
11954    unsafe fn test_mm512_mask_add_epi16() {
11955        let a = _mm512_set1_epi16(1);
11956        let b = _mm512_set1_epi16(2);
11957        let r = _mm512_mask_add_epi16(a, 0, a, b);
11958        assert_eq_m512i(r, a);
11959        let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
11960        #[rustfmt::skip]
11961        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
11962                                 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
11963        assert_eq_m512i(r, e);
11964    }
11965
11966    #[simd_test(enable = "avx512bw")]
11967    unsafe fn test_mm512_maskz_add_epi16() {
11968        let a = _mm512_set1_epi16(1);
11969        let b = _mm512_set1_epi16(2);
11970        let r = _mm512_maskz_add_epi16(0, a, b);
11971        assert_eq_m512i(r, _mm512_setzero_si512());
11972        let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b);
11973        #[rustfmt::skip]
11974        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
11975                                 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
11976        assert_eq_m512i(r, e);
11977    }
11978
11979    #[simd_test(enable = "avx512bw,avx512vl")]
11980    unsafe fn test_mm256_mask_add_epi16() {
11981        let a = _mm256_set1_epi16(1);
11982        let b = _mm256_set1_epi16(2);
11983        let r = _mm256_mask_add_epi16(a, 0, a, b);
11984        assert_eq_m256i(r, a);
11985        let r = _mm256_mask_add_epi16(a, 0b00000000_11111111, a, b);
11986        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
11987        assert_eq_m256i(r, e);
11988    }
11989
11990    #[simd_test(enable = "avx512bw,avx512vl")]
11991    unsafe fn test_mm256_maskz_add_epi16() {
11992        let a = _mm256_set1_epi16(1);
11993        let b = _mm256_set1_epi16(2);
11994        let r = _mm256_maskz_add_epi16(0, a, b);
11995        assert_eq_m256i(r, _mm256_setzero_si256());
11996        let r = _mm256_maskz_add_epi16(0b00000000_11111111, a, b);
11997        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
11998        assert_eq_m256i(r, e);
11999    }
12000
12001    #[simd_test(enable = "avx512bw,avx512vl")]
12002    unsafe fn test_mm_mask_add_epi16() {
12003        let a = _mm_set1_epi16(1);
12004        let b = _mm_set1_epi16(2);
12005        let r = _mm_mask_add_epi16(a, 0, a, b);
12006        assert_eq_m128i(r, a);
12007        let r = _mm_mask_add_epi16(a, 0b00001111, a, b);
12008        let e = _mm_set_epi16(1, 1, 1, 1, 3, 3, 3, 3);
12009        assert_eq_m128i(r, e);
12010    }
12011
12012    #[simd_test(enable = "avx512bw,avx512vl")]
12013    unsafe fn test_mm_maskz_add_epi16() {
12014        let a = _mm_set1_epi16(1);
12015        let b = _mm_set1_epi16(2);
12016        let r = _mm_maskz_add_epi16(0, a, b);
12017        assert_eq_m128i(r, _mm_setzero_si128());
12018        let r = _mm_maskz_add_epi16(0b00001111, a, b);
12019        let e = _mm_set_epi16(0, 0, 0, 0, 3, 3, 3, 3);
12020        assert_eq_m128i(r, e);
12021    }
12022
12023    #[simd_test(enable = "avx512bw")]
12024    unsafe fn test_mm512_add_epi8() {
12025        let a = _mm512_set1_epi8(1);
12026        let b = _mm512_set1_epi8(2);
12027        let r = _mm512_add_epi8(a, b);
12028        let e = _mm512_set1_epi8(3);
12029        assert_eq_m512i(r, e);
12030    }
12031
12032    #[simd_test(enable = "avx512bw")]
12033    unsafe fn test_mm512_mask_add_epi8() {
12034        let a = _mm512_set1_epi8(1);
12035        let b = _mm512_set1_epi8(2);
12036        let r = _mm512_mask_add_epi8(a, 0, a, b);
12037        assert_eq_m512i(r, a);
12038        let r = _mm512_mask_add_epi8(
12039            a,
12040            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12041            a,
12042            b,
12043        );
12044        #[rustfmt::skip]
12045        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12046                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12047                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12048                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12049        assert_eq_m512i(r, e);
12050    }
12051
12052    #[simd_test(enable = "avx512bw")]
12053    unsafe fn test_mm512_maskz_add_epi8() {
12054        let a = _mm512_set1_epi8(1);
12055        let b = _mm512_set1_epi8(2);
12056        let r = _mm512_maskz_add_epi8(0, a, b);
12057        assert_eq_m512i(r, _mm512_setzero_si512());
12058        let r = _mm512_maskz_add_epi8(
12059            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12060            a,
12061            b,
12062        );
12063        #[rustfmt::skip]
12064        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12065                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12066                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12067                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12068        assert_eq_m512i(r, e);
12069    }
12070
12071    #[simd_test(enable = "avx512bw,avx512vl")]
12072    unsafe fn test_mm256_mask_add_epi8() {
12073        let a = _mm256_set1_epi8(1);
12074        let b = _mm256_set1_epi8(2);
12075        let r = _mm256_mask_add_epi8(a, 0, a, b);
12076        assert_eq_m256i(r, a);
12077        let r = _mm256_mask_add_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12078        #[rustfmt::skip]
12079        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12080                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12081        assert_eq_m256i(r, e);
12082    }
12083
12084    #[simd_test(enable = "avx512bw,avx512vl")]
12085    unsafe fn test_mm256_maskz_add_epi8() {
12086        let a = _mm256_set1_epi8(1);
12087        let b = _mm256_set1_epi8(2);
12088        let r = _mm256_maskz_add_epi8(0, a, b);
12089        assert_eq_m256i(r, _mm256_setzero_si256());
12090        let r = _mm256_maskz_add_epi8(0b00000000_11111111_00000000_11111111, a, b);
12091        #[rustfmt::skip]
12092        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12093                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12094        assert_eq_m256i(r, e);
12095    }
12096
12097    #[simd_test(enable = "avx512bw,avx512vl")]
12098    unsafe fn test_mm_mask_add_epi8() {
12099        let a = _mm_set1_epi8(1);
12100        let b = _mm_set1_epi8(2);
12101        let r = _mm_mask_add_epi8(a, 0, a, b);
12102        assert_eq_m128i(r, a);
12103        let r = _mm_mask_add_epi8(a, 0b00000000_11111111, a, b);
12104        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12105        assert_eq_m128i(r, e);
12106    }
12107
12108    #[simd_test(enable = "avx512bw,avx512vl")]
12109    unsafe fn test_mm_maskz_add_epi8() {
12110        let a = _mm_set1_epi8(1);
12111        let b = _mm_set1_epi8(2);
12112        let r = _mm_maskz_add_epi8(0, a, b);
12113        assert_eq_m128i(r, _mm_setzero_si128());
12114        let r = _mm_maskz_add_epi8(0b00000000_11111111, a, b);
12115        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12116        assert_eq_m128i(r, e);
12117    }
12118
12119    #[simd_test(enable = "avx512bw")]
12120    unsafe fn test_mm512_adds_epu16() {
12121        let a = _mm512_set1_epi16(1);
12122        let b = _mm512_set1_epi16(u16::MAX as i16);
12123        let r = _mm512_adds_epu16(a, b);
12124        let e = _mm512_set1_epi16(u16::MAX as i16);
12125        assert_eq_m512i(r, e);
12126    }
12127
12128    #[simd_test(enable = "avx512bw")]
12129    unsafe fn test_mm512_mask_adds_epu16() {
12130        let a = _mm512_set1_epi16(1);
12131        let b = _mm512_set1_epi16(u16::MAX as i16);
12132        let r = _mm512_mask_adds_epu16(a, 0, a, b);
12133        assert_eq_m512i(r, a);
12134        let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
12135        #[rustfmt::skip]
12136        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12137                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12138        assert_eq_m512i(r, e);
12139    }
12140
12141    #[simd_test(enable = "avx512bw")]
12142    unsafe fn test_mm512_maskz_adds_epu16() {
12143        let a = _mm512_set1_epi16(1);
12144        let b = _mm512_set1_epi16(u16::MAX as i16);
12145        let r = _mm512_maskz_adds_epu16(0, a, b);
12146        assert_eq_m512i(r, _mm512_setzero_si512());
12147        let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b);
12148        #[rustfmt::skip]
12149        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12150                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12151        assert_eq_m512i(r, e);
12152    }
12153
12154    #[simd_test(enable = "avx512bw,avx512vl")]
12155    unsafe fn test_mm256_mask_adds_epu16() {
12156        let a = _mm256_set1_epi16(1);
12157        let b = _mm256_set1_epi16(u16::MAX as i16);
12158        let r = _mm256_mask_adds_epu16(a, 0, a, b);
12159        assert_eq_m256i(r, a);
12160        let r = _mm256_mask_adds_epu16(a, 0b00000000_00001111, a, b);
12161        #[rustfmt::skip]
12162        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12163        assert_eq_m256i(r, e);
12164    }
12165
12166    #[simd_test(enable = "avx512bw,avx512vl")]
12167    unsafe fn test_mm256_maskz_adds_epu16() {
12168        let a = _mm256_set1_epi16(1);
12169        let b = _mm256_set1_epi16(u16::MAX as i16);
12170        let r = _mm256_maskz_adds_epu16(0, a, b);
12171        assert_eq_m256i(r, _mm256_setzero_si256());
12172        let r = _mm256_maskz_adds_epu16(0b00000000_00001111, a, b);
12173        #[rustfmt::skip]
12174        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12175        assert_eq_m256i(r, e);
12176    }
12177
12178    #[simd_test(enable = "avx512bw,avx512vl")]
12179    unsafe fn test_mm_mask_adds_epu16() {
12180        let a = _mm_set1_epi16(1);
12181        let b = _mm_set1_epi16(u16::MAX as i16);
12182        let r = _mm_mask_adds_epu16(a, 0, a, b);
12183        assert_eq_m128i(r, a);
12184        let r = _mm_mask_adds_epu16(a, 0b00001111, a, b);
12185        #[rustfmt::skip]
12186        let e = _mm_set_epi16(1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12187        assert_eq_m128i(r, e);
12188    }
12189
12190    #[simd_test(enable = "avx512bw,avx512vl")]
12191    unsafe fn test_mm_maskz_adds_epu16() {
12192        let a = _mm_set1_epi16(1);
12193        let b = _mm_set1_epi16(u16::MAX as i16);
12194        let r = _mm_maskz_adds_epu16(0, a, b);
12195        assert_eq_m128i(r, _mm_setzero_si128());
12196        let r = _mm_maskz_adds_epu16(0b00001111, a, b);
12197        #[rustfmt::skip]
12198        let e = _mm_set_epi16(0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12199        assert_eq_m128i(r, e);
12200    }
12201
12202    #[simd_test(enable = "avx512bw")]
12203    unsafe fn test_mm512_adds_epu8() {
12204        let a = _mm512_set1_epi8(1);
12205        let b = _mm512_set1_epi8(u8::MAX as i8);
12206        let r = _mm512_adds_epu8(a, b);
12207        let e = _mm512_set1_epi8(u8::MAX as i8);
12208        assert_eq_m512i(r, e);
12209    }
12210
12211    #[simd_test(enable = "avx512bw")]
12212    unsafe fn test_mm512_mask_adds_epu8() {
12213        let a = _mm512_set1_epi8(1);
12214        let b = _mm512_set1_epi8(u8::MAX as i8);
12215        let r = _mm512_mask_adds_epu8(a, 0, a, b);
12216        assert_eq_m512i(r, a);
12217        let r = _mm512_mask_adds_epu8(
12218            a,
12219            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12220            a,
12221            b,
12222        );
12223        #[rustfmt::skip]
12224        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12225                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12226                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12227                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12228        assert_eq_m512i(r, e);
12229    }
12230
12231    #[simd_test(enable = "avx512bw")]
12232    unsafe fn test_mm512_maskz_adds_epu8() {
12233        let a = _mm512_set1_epi8(1);
12234        let b = _mm512_set1_epi8(u8::MAX as i8);
12235        let r = _mm512_maskz_adds_epu8(0, a, b);
12236        assert_eq_m512i(r, _mm512_setzero_si512());
12237        let r = _mm512_maskz_adds_epu8(
12238            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12239            a,
12240            b,
12241        );
12242        #[rustfmt::skip]
12243        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12244                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12245                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12246                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12247        assert_eq_m512i(r, e);
12248    }
12249
12250    #[simd_test(enable = "avx512bw,avx512vl")]
12251    unsafe fn test_mm256_mask_adds_epu8() {
12252        let a = _mm256_set1_epi8(1);
12253        let b = _mm256_set1_epi8(u8::MAX as i8);
12254        let r = _mm256_mask_adds_epu8(a, 0, a, b);
12255        assert_eq_m256i(r, a);
12256        let r = _mm256_mask_adds_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
12257        #[rustfmt::skip]
12258        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12259                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12260        assert_eq_m256i(r, e);
12261    }
12262
12263    #[simd_test(enable = "avx512bw,avx512vl")]
12264    unsafe fn test_mm256_maskz_adds_epu8() {
12265        let a = _mm256_set1_epi8(1);
12266        let b = _mm256_set1_epi8(u8::MAX as i8);
12267        let r = _mm256_maskz_adds_epu8(0, a, b);
12268        assert_eq_m256i(r, _mm256_setzero_si256());
12269        let r = _mm256_maskz_adds_epu8(0b00000000_00000000_00000000_00001111, a, b);
12270        #[rustfmt::skip]
12271        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12272                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12273        assert_eq_m256i(r, e);
12274    }
12275
12276    #[simd_test(enable = "avx512bw,avx512vl")]
12277    unsafe fn test_mm_mask_adds_epu8() {
12278        let a = _mm_set1_epi8(1);
12279        let b = _mm_set1_epi8(u8::MAX as i8);
12280        let r = _mm_mask_adds_epu8(a, 0, a, b);
12281        assert_eq_m128i(r, a);
12282        let r = _mm_mask_adds_epu8(a, 0b00000000_00001111, a, b);
12283        #[rustfmt::skip]
12284        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12285        assert_eq_m128i(r, e);
12286    }
12287
12288    #[simd_test(enable = "avx512bw,avx512vl")]
12289    unsafe fn test_mm_maskz_adds_epu8() {
12290        let a = _mm_set1_epi8(1);
12291        let b = _mm_set1_epi8(u8::MAX as i8);
12292        let r = _mm_maskz_adds_epu8(0, a, b);
12293        assert_eq_m128i(r, _mm_setzero_si128());
12294        let r = _mm_maskz_adds_epu8(0b00000000_00001111, a, b);
12295        #[rustfmt::skip]
12296        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12297        assert_eq_m128i(r, e);
12298    }
12299
12300    #[simd_test(enable = "avx512bw")]
12301    unsafe fn test_mm512_adds_epi16() {
12302        let a = _mm512_set1_epi16(1);
12303        let b = _mm512_set1_epi16(i16::MAX);
12304        let r = _mm512_adds_epi16(a, b);
12305        let e = _mm512_set1_epi16(i16::MAX);
12306        assert_eq_m512i(r, e);
12307    }
12308
12309    #[simd_test(enable = "avx512bw")]
12310    unsafe fn test_mm512_mask_adds_epi16() {
12311        let a = _mm512_set1_epi16(1);
12312        let b = _mm512_set1_epi16(i16::MAX);
12313        let r = _mm512_mask_adds_epi16(a, 0, a, b);
12314        assert_eq_m512i(r, a);
12315        let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
12316        #[rustfmt::skip]
12317        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12318                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12319        assert_eq_m512i(r, e);
12320    }
12321
12322    #[simd_test(enable = "avx512bw")]
12323    unsafe fn test_mm512_maskz_adds_epi16() {
12324        let a = _mm512_set1_epi16(1);
12325        let b = _mm512_set1_epi16(i16::MAX);
12326        let r = _mm512_maskz_adds_epi16(0, a, b);
12327        assert_eq_m512i(r, _mm512_setzero_si512());
12328        let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b);
12329        #[rustfmt::skip]
12330        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12331                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12332        assert_eq_m512i(r, e);
12333    }
12334
12335    #[simd_test(enable = "avx512bw,avx512vl")]
12336    unsafe fn test_mm256_mask_adds_epi16() {
12337        let a = _mm256_set1_epi16(1);
12338        let b = _mm256_set1_epi16(i16::MAX);
12339        let r = _mm256_mask_adds_epi16(a, 0, a, b);
12340        assert_eq_m256i(r, a);
12341        let r = _mm256_mask_adds_epi16(a, 0b00000000_00001111, a, b);
12342        #[rustfmt::skip]
12343        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12344        assert_eq_m256i(r, e);
12345    }
12346
12347    #[simd_test(enable = "avx512bw,avx512vl")]
12348    unsafe fn test_mm256_maskz_adds_epi16() {
12349        let a = _mm256_set1_epi16(1);
12350        let b = _mm256_set1_epi16(i16::MAX);
12351        let r = _mm256_maskz_adds_epi16(0, a, b);
12352        assert_eq_m256i(r, _mm256_setzero_si256());
12353        let r = _mm256_maskz_adds_epi16(0b00000000_00001111, a, b);
12354        #[rustfmt::skip]
12355        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12356        assert_eq_m256i(r, e);
12357    }
12358
12359    #[simd_test(enable = "avx512bw,avx512vl")]
12360    unsafe fn test_mm_mask_adds_epi16() {
12361        let a = _mm_set1_epi16(1);
12362        let b = _mm_set1_epi16(i16::MAX);
12363        let r = _mm_mask_adds_epi16(a, 0, a, b);
12364        assert_eq_m128i(r, a);
12365        let r = _mm_mask_adds_epi16(a, 0b00001111, a, b);
12366        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12367        assert_eq_m128i(r, e);
12368    }
12369
12370    #[simd_test(enable = "avx512bw,avx512vl")]
12371    unsafe fn test_mm_maskz_adds_epi16() {
12372        let a = _mm_set1_epi16(1);
12373        let b = _mm_set1_epi16(i16::MAX);
12374        let r = _mm_maskz_adds_epi16(0, a, b);
12375        assert_eq_m128i(r, _mm_setzero_si128());
12376        let r = _mm_maskz_adds_epi16(0b00001111, a, b);
12377        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12378        assert_eq_m128i(r, e);
12379    }
12380
12381    #[simd_test(enable = "avx512bw")]
12382    unsafe fn test_mm512_adds_epi8() {
12383        let a = _mm512_set1_epi8(1);
12384        let b = _mm512_set1_epi8(i8::MAX);
12385        let r = _mm512_adds_epi8(a, b);
12386        let e = _mm512_set1_epi8(i8::MAX);
12387        assert_eq_m512i(r, e);
12388    }
12389
12390    #[simd_test(enable = "avx512bw")]
12391    unsafe fn test_mm512_mask_adds_epi8() {
12392        let a = _mm512_set1_epi8(1);
12393        let b = _mm512_set1_epi8(i8::MAX);
12394        let r = _mm512_mask_adds_epi8(a, 0, a, b);
12395        assert_eq_m512i(r, a);
12396        let r = _mm512_mask_adds_epi8(
12397            a,
12398            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12399            a,
12400            b,
12401        );
12402        #[rustfmt::skip]
12403        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12404                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12405                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12406                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12407        assert_eq_m512i(r, e);
12408    }
12409
12410    #[simd_test(enable = "avx512bw")]
12411    unsafe fn test_mm512_maskz_adds_epi8() {
12412        let a = _mm512_set1_epi8(1);
12413        let b = _mm512_set1_epi8(i8::MAX);
12414        let r = _mm512_maskz_adds_epi8(0, a, b);
12415        assert_eq_m512i(r, _mm512_setzero_si512());
12416        let r = _mm512_maskz_adds_epi8(
12417            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12418            a,
12419            b,
12420        );
12421        #[rustfmt::skip]
12422        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12423                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12424                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12425                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12426        assert_eq_m512i(r, e);
12427    }
12428
12429    #[simd_test(enable = "avx512bw,avx512vl")]
12430    unsafe fn test_mm256_mask_adds_epi8() {
12431        let a = _mm256_set1_epi8(1);
12432        let b = _mm256_set1_epi8(i8::MAX);
12433        let r = _mm256_mask_adds_epi8(a, 0, a, b);
12434        assert_eq_m256i(r, a);
12435        let r = _mm256_mask_adds_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
12436        #[rustfmt::skip]
12437        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12438                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12439        assert_eq_m256i(r, e);
12440    }
12441
12442    #[simd_test(enable = "avx512bw,avx512vl")]
12443    unsafe fn test_mm256_maskz_adds_epi8() {
12444        let a = _mm256_set1_epi8(1);
12445        let b = _mm256_set1_epi8(i8::MAX);
12446        let r = _mm256_maskz_adds_epi8(0, a, b);
12447        assert_eq_m256i(r, _mm256_setzero_si256());
12448        let r = _mm256_maskz_adds_epi8(0b00000000_00000000_00000000_00001111, a, b);
12449        #[rustfmt::skip]
12450        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12451                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12452        assert_eq_m256i(r, e);
12453    }
12454
12455    #[simd_test(enable = "avx512bw,avx512vl")]
12456    unsafe fn test_mm_mask_adds_epi8() {
12457        let a = _mm_set1_epi8(1);
12458        let b = _mm_set1_epi8(i8::MAX);
12459        let r = _mm_mask_adds_epi8(a, 0, a, b);
12460        assert_eq_m128i(r, a);
12461        let r = _mm_mask_adds_epi8(a, 0b00000000_00001111, a, b);
12462        #[rustfmt::skip]
12463        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12464        assert_eq_m128i(r, e);
12465    }
12466
12467    #[simd_test(enable = "avx512bw,avx512vl")]
12468    unsafe fn test_mm_maskz_adds_epi8() {
12469        let a = _mm_set1_epi8(1);
12470        let b = _mm_set1_epi8(i8::MAX);
12471        let r = _mm_maskz_adds_epi8(0, a, b);
12472        assert_eq_m128i(r, _mm_setzero_si128());
12473        let r = _mm_maskz_adds_epi8(0b00000000_00001111, a, b);
12474        #[rustfmt::skip]
12475        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12476        assert_eq_m128i(r, e);
12477    }
12478
12479    #[simd_test(enable = "avx512bw")]
12480    unsafe fn test_mm512_sub_epi16() {
12481        let a = _mm512_set1_epi16(1);
12482        let b = _mm512_set1_epi16(2);
12483        let r = _mm512_sub_epi16(a, b);
12484        let e = _mm512_set1_epi16(-1);
12485        assert_eq_m512i(r, e);
12486    }
12487
12488    #[simd_test(enable = "avx512bw")]
12489    unsafe fn test_mm512_mask_sub_epi16() {
12490        let a = _mm512_set1_epi16(1);
12491        let b = _mm512_set1_epi16(2);
12492        let r = _mm512_mask_sub_epi16(a, 0, a, b);
12493        assert_eq_m512i(r, a);
12494        let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
12495        #[rustfmt::skip]
12496        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12497                                 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12498        assert_eq_m512i(r, e);
12499    }
12500
12501    #[simd_test(enable = "avx512bw")]
12502    unsafe fn test_mm512_maskz_sub_epi16() {
12503        let a = _mm512_set1_epi16(1);
12504        let b = _mm512_set1_epi16(2);
12505        let r = _mm512_maskz_sub_epi16(0, a, b);
12506        assert_eq_m512i(r, _mm512_setzero_si512());
12507        let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b);
12508        #[rustfmt::skip]
12509        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12510                                 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12511        assert_eq_m512i(r, e);
12512    }
12513
12514    #[simd_test(enable = "avx512bw,avx512vl")]
12515    unsafe fn test_mm256_mask_sub_epi16() {
12516        let a = _mm256_set1_epi16(1);
12517        let b = _mm256_set1_epi16(2);
12518        let r = _mm256_mask_sub_epi16(a, 0, a, b);
12519        assert_eq_m256i(r, a);
12520        let r = _mm256_mask_sub_epi16(a, 0b00000000_11111111, a, b);
12521        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12522        assert_eq_m256i(r, e);
12523    }
12524
12525    #[simd_test(enable = "avx512bw,avx512vl")]
12526    unsafe fn test_mm256_maskz_sub_epi16() {
12527        let a = _mm256_set1_epi16(1);
12528        let b = _mm256_set1_epi16(2);
12529        let r = _mm256_maskz_sub_epi16(0, a, b);
12530        assert_eq_m256i(r, _mm256_setzero_si256());
12531        let r = _mm256_maskz_sub_epi16(0b00000000_11111111, a, b);
12532        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12533        assert_eq_m256i(r, e);
12534    }
12535
12536    #[simd_test(enable = "avx512bw,avx512vl")]
12537    unsafe fn test_mm_mask_sub_epi16() {
12538        let a = _mm_set1_epi16(1);
12539        let b = _mm_set1_epi16(2);
12540        let r = _mm_mask_sub_epi16(a, 0, a, b);
12541        assert_eq_m128i(r, a);
12542        let r = _mm_mask_sub_epi16(a, 0b00001111, a, b);
12543        let e = _mm_set_epi16(1, 1, 1, 1, -1, -1, -1, -1);
12544        assert_eq_m128i(r, e);
12545    }
12546
12547    #[simd_test(enable = "avx512bw,avx512vl")]
12548    unsafe fn test_mm_maskz_sub_epi16() {
12549        let a = _mm_set1_epi16(1);
12550        let b = _mm_set1_epi16(2);
12551        let r = _mm_maskz_sub_epi16(0, a, b);
12552        assert_eq_m128i(r, _mm_setzero_si128());
12553        let r = _mm_maskz_sub_epi16(0b00001111, a, b);
12554        let e = _mm_set_epi16(0, 0, 0, 0, -1, -1, -1, -1);
12555        assert_eq_m128i(r, e);
12556    }
12557
12558    #[simd_test(enable = "avx512bw")]
12559    unsafe fn test_mm512_sub_epi8() {
12560        let a = _mm512_set1_epi8(1);
12561        let b = _mm512_set1_epi8(2);
12562        let r = _mm512_sub_epi8(a, b);
12563        let e = _mm512_set1_epi8(-1);
12564        assert_eq_m512i(r, e);
12565    }
12566
12567    #[simd_test(enable = "avx512bw")]
12568    unsafe fn test_mm512_mask_sub_epi8() {
12569        let a = _mm512_set1_epi8(1);
12570        let b = _mm512_set1_epi8(2);
12571        let r = _mm512_mask_sub_epi8(a, 0, a, b);
12572        assert_eq_m512i(r, a);
12573        let r = _mm512_mask_sub_epi8(
12574            a,
12575            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12576            a,
12577            b,
12578        );
12579        #[rustfmt::skip]
12580        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12581                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12582                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12583                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12584        assert_eq_m512i(r, e);
12585    }
12586
12587    #[simd_test(enable = "avx512bw")]
12588    unsafe fn test_mm512_maskz_sub_epi8() {
12589        let a = _mm512_set1_epi8(1);
12590        let b = _mm512_set1_epi8(2);
12591        let r = _mm512_maskz_sub_epi8(0, a, b);
12592        assert_eq_m512i(r, _mm512_setzero_si512());
12593        let r = _mm512_maskz_sub_epi8(
12594            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12595            a,
12596            b,
12597        );
12598        #[rustfmt::skip]
12599        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12600                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12601                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12602                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12603        assert_eq_m512i(r, e);
12604    }
12605
12606    #[simd_test(enable = "avx512bw,avx512vl")]
12607    unsafe fn test_mm256_mask_sub_epi8() {
12608        let a = _mm256_set1_epi8(1);
12609        let b = _mm256_set1_epi8(2);
12610        let r = _mm256_mask_sub_epi8(a, 0, a, b);
12611        assert_eq_m256i(r, a);
12612        let r = _mm256_mask_sub_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12613        #[rustfmt::skip]
12614        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12615                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12616        assert_eq_m256i(r, e);
12617    }
12618
12619    #[simd_test(enable = "avx512bw,avx512vl")]
12620    unsafe fn test_mm256_maskz_sub_epi8() {
12621        let a = _mm256_set1_epi8(1);
12622        let b = _mm256_set1_epi8(2);
12623        let r = _mm256_maskz_sub_epi8(0, a, b);
12624        assert_eq_m256i(r, _mm256_setzero_si256());
12625        let r = _mm256_maskz_sub_epi8(0b00000000_11111111_00000000_11111111, a, b);
12626        #[rustfmt::skip]
12627        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12628                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12629        assert_eq_m256i(r, e);
12630    }
12631
12632    #[simd_test(enable = "avx512bw,avx512vl")]
12633    unsafe fn test_mm_mask_sub_epi8() {
12634        let a = _mm_set1_epi8(1);
12635        let b = _mm_set1_epi8(2);
12636        let r = _mm_mask_sub_epi8(a, 0, a, b);
12637        assert_eq_m128i(r, a);
12638        let r = _mm_mask_sub_epi8(a, 0b00000000_11111111, a, b);
12639        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12640        assert_eq_m128i(r, e);
12641    }
12642
12643    #[simd_test(enable = "avx512bw,avx512vl")]
12644    unsafe fn test_mm_maskz_sub_epi8() {
12645        let a = _mm_set1_epi8(1);
12646        let b = _mm_set1_epi8(2);
12647        let r = _mm_maskz_sub_epi8(0, a, b);
12648        assert_eq_m128i(r, _mm_setzero_si128());
12649        let r = _mm_maskz_sub_epi8(0b00000000_11111111, a, b);
12650        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12651        assert_eq_m128i(r, e);
12652    }
12653
12654    #[simd_test(enable = "avx512bw")]
12655    unsafe fn test_mm512_subs_epu16() {
12656        let a = _mm512_set1_epi16(1);
12657        let b = _mm512_set1_epi16(u16::MAX as i16);
12658        let r = _mm512_subs_epu16(a, b);
12659        let e = _mm512_set1_epi16(0);
12660        assert_eq_m512i(r, e);
12661    }
12662
12663    #[simd_test(enable = "avx512bw")]
12664    unsafe fn test_mm512_mask_subs_epu16() {
12665        let a = _mm512_set1_epi16(1);
12666        let b = _mm512_set1_epi16(u16::MAX as i16);
12667        let r = _mm512_mask_subs_epu16(a, 0, a, b);
12668        assert_eq_m512i(r, a);
12669        let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
12670        #[rustfmt::skip]
12671        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12672                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12673        assert_eq_m512i(r, e);
12674    }
12675
12676    #[simd_test(enable = "avx512bw")]
12677    unsafe fn test_mm512_maskz_subs_epu16() {
12678        let a = _mm512_set1_epi16(1);
12679        let b = _mm512_set1_epi16(u16::MAX as i16);
12680        let r = _mm512_maskz_subs_epu16(0, a, b);
12681        assert_eq_m512i(r, _mm512_setzero_si512());
12682        let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b);
12683        #[rustfmt::skip]
12684        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12685                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12686        assert_eq_m512i(r, e);
12687    }
12688
12689    #[simd_test(enable = "avx512bw,avx512vl")]
12690    unsafe fn test_mm256_mask_subs_epu16() {
12691        let a = _mm256_set1_epi16(1);
12692        let b = _mm256_set1_epi16(u16::MAX as i16);
12693        let r = _mm256_mask_subs_epu16(a, 0, a, b);
12694        assert_eq_m256i(r, a);
12695        let r = _mm256_mask_subs_epu16(a, 0b00000000_00001111, a, b);
12696        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12697        assert_eq_m256i(r, e);
12698    }
12699
12700    #[simd_test(enable = "avx512bw,avx512vl")]
12701    unsafe fn test_mm256_maskz_subs_epu16() {
12702        let a = _mm256_set1_epi16(1);
12703        let b = _mm256_set1_epi16(u16::MAX as i16);
12704        let r = _mm256_maskz_subs_epu16(0, a, b);
12705        assert_eq_m256i(r, _mm256_setzero_si256());
12706        let r = _mm256_maskz_subs_epu16(0b00000000_00001111, a, b);
12707        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12708        assert_eq_m256i(r, e);
12709    }
12710
12711    #[simd_test(enable = "avx512bw,avx512vl")]
12712    unsafe fn test_mm_mask_subs_epu16() {
12713        let a = _mm_set1_epi16(1);
12714        let b = _mm_set1_epi16(u16::MAX as i16);
12715        let r = _mm_mask_subs_epu16(a, 0, a, b);
12716        assert_eq_m128i(r, a);
12717        let r = _mm_mask_subs_epu16(a, 0b00001111, a, b);
12718        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
12719        assert_eq_m128i(r, e);
12720    }
12721
12722    #[simd_test(enable = "avx512bw,avx512vl")]
12723    unsafe fn test_mm_maskz_subs_epu16() {
12724        let a = _mm_set1_epi16(1);
12725        let b = _mm_set1_epi16(u16::MAX as i16);
12726        let r = _mm_maskz_subs_epu16(0, a, b);
12727        assert_eq_m128i(r, _mm_setzero_si128());
12728        let r = _mm_maskz_subs_epu16(0b00001111, a, b);
12729        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
12730        assert_eq_m128i(r, e);
12731    }
12732
12733    #[simd_test(enable = "avx512bw")]
12734    unsafe fn test_mm512_subs_epu8() {
12735        let a = _mm512_set1_epi8(1);
12736        let b = _mm512_set1_epi8(u8::MAX as i8);
12737        let r = _mm512_subs_epu8(a, b);
12738        let e = _mm512_set1_epi8(0);
12739        assert_eq_m512i(r, e);
12740    }
12741
12742    #[simd_test(enable = "avx512bw")]
12743    unsafe fn test_mm512_mask_subs_epu8() {
12744        let a = _mm512_set1_epi8(1);
12745        let b = _mm512_set1_epi8(u8::MAX as i8);
12746        let r = _mm512_mask_subs_epu8(a, 0, a, b);
12747        assert_eq_m512i(r, a);
12748        let r = _mm512_mask_subs_epu8(
12749            a,
12750            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12751            a,
12752            b,
12753        );
12754        #[rustfmt::skip]
12755        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12756                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12757                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12758                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12759        assert_eq_m512i(r, e);
12760    }
12761
12762    #[simd_test(enable = "avx512bw")]
12763    unsafe fn test_mm512_maskz_subs_epu8() {
12764        let a = _mm512_set1_epi8(1);
12765        let b = _mm512_set1_epi8(u8::MAX as i8);
12766        let r = _mm512_maskz_subs_epu8(0, a, b);
12767        assert_eq_m512i(r, _mm512_setzero_si512());
12768        let r = _mm512_maskz_subs_epu8(
12769            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12770            a,
12771            b,
12772        );
12773        #[rustfmt::skip]
12774        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12775                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12776                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12777                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12778        assert_eq_m512i(r, e);
12779    }
12780
12781    #[simd_test(enable = "avx512bw,avx512vl")]
12782    unsafe fn test_mm256_mask_subs_epu8() {
12783        let a = _mm256_set1_epi8(1);
12784        let b = _mm256_set1_epi8(u8::MAX as i8);
12785        let r = _mm256_mask_subs_epu8(a, 0, a, b);
12786        assert_eq_m256i(r, a);
12787        let r = _mm256_mask_subs_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
12788        #[rustfmt::skip]
12789        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12790                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12791        assert_eq_m256i(r, e);
12792    }
12793
12794    #[simd_test(enable = "avx512bw,avx512vl")]
12795    unsafe fn test_mm256_maskz_subs_epu8() {
12796        let a = _mm256_set1_epi8(1);
12797        let b = _mm256_set1_epi8(u8::MAX as i8);
12798        let r = _mm256_maskz_subs_epu8(0, a, b);
12799        assert_eq_m256i(r, _mm256_setzero_si256());
12800        let r = _mm256_maskz_subs_epu8(0b00000000_00000000_00000000_00001111, a, b);
12801        #[rustfmt::skip]
12802        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12803                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12804        assert_eq_m256i(r, e);
12805    }
12806
12807    #[simd_test(enable = "avx512bw,avx512vl")]
12808    unsafe fn test_mm_mask_subs_epu8() {
12809        let a = _mm_set1_epi8(1);
12810        let b = _mm_set1_epi8(u8::MAX as i8);
12811        let r = _mm_mask_subs_epu8(a, 0, a, b);
12812        assert_eq_m128i(r, a);
12813        let r = _mm_mask_subs_epu8(a, 0b00000000_00001111, a, b);
12814        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12815        assert_eq_m128i(r, e);
12816    }
12817
12818    #[simd_test(enable = "avx512bw,avx512vl")]
12819    unsafe fn test_mm_maskz_subs_epu8() {
12820        let a = _mm_set1_epi8(1);
12821        let b = _mm_set1_epi8(u8::MAX as i8);
12822        let r = _mm_maskz_subs_epu8(0, a, b);
12823        assert_eq_m128i(r, _mm_setzero_si128());
12824        let r = _mm_maskz_subs_epu8(0b00000000_00001111, a, b);
12825        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12826        assert_eq_m128i(r, e);
12827    }
12828
12829    #[simd_test(enable = "avx512bw")]
12830    unsafe fn test_mm512_subs_epi16() {
12831        let a = _mm512_set1_epi16(-1);
12832        let b = _mm512_set1_epi16(i16::MAX);
12833        let r = _mm512_subs_epi16(a, b);
12834        let e = _mm512_set1_epi16(i16::MIN);
12835        assert_eq_m512i(r, e);
12836    }
12837
12838    #[simd_test(enable = "avx512bw")]
12839    unsafe fn test_mm512_mask_subs_epi16() {
12840        let a = _mm512_set1_epi16(-1);
12841        let b = _mm512_set1_epi16(i16::MAX);
12842        let r = _mm512_mask_subs_epi16(a, 0, a, b);
12843        assert_eq_m512i(r, a);
12844        let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
12845        #[rustfmt::skip]
12846        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12847                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12848        assert_eq_m512i(r, e);
12849    }
12850
12851    #[simd_test(enable = "avx512bw")]
12852    unsafe fn test_mm512_maskz_subs_epi16() {
12853        let a = _mm512_set1_epi16(-1);
12854        let b = _mm512_set1_epi16(i16::MAX);
12855        let r = _mm512_maskz_subs_epi16(0, a, b);
12856        assert_eq_m512i(r, _mm512_setzero_si512());
12857        let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b);
12858        #[rustfmt::skip]
12859        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12860                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12861        assert_eq_m512i(r, e);
12862    }
12863
12864    #[simd_test(enable = "avx512bw,avx512vl")]
12865    unsafe fn test_mm256_mask_subs_epi16() {
12866        let a = _mm256_set1_epi16(-1);
12867        let b = _mm256_set1_epi16(i16::MAX);
12868        let r = _mm256_mask_subs_epi16(a, 0, a, b);
12869        assert_eq_m256i(r, a);
12870        let r = _mm256_mask_subs_epi16(a, 0b00000000_00001111, a, b);
12871        #[rustfmt::skip]
12872        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12873        assert_eq_m256i(r, e);
12874    }
12875
12876    #[simd_test(enable = "avx512bw,avx512vl")]
12877    unsafe fn test_mm256_maskz_subs_epi16() {
12878        let a = _mm256_set1_epi16(-1);
12879        let b = _mm256_set1_epi16(i16::MAX);
12880        let r = _mm256_maskz_subs_epi16(0, a, b);
12881        assert_eq_m256i(r, _mm256_setzero_si256());
12882        let r = _mm256_maskz_subs_epi16(0b00000000_00001111, a, b);
12883        #[rustfmt::skip]
12884        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12885        assert_eq_m256i(r, e);
12886    }
12887
12888    #[simd_test(enable = "avx512bw,avx512vl")]
12889    unsafe fn test_mm_mask_subs_epi16() {
12890        let a = _mm_set1_epi16(-1);
12891        let b = _mm_set1_epi16(i16::MAX);
12892        let r = _mm_mask_subs_epi16(a, 0, a, b);
12893        assert_eq_m128i(r, a);
12894        let r = _mm_mask_subs_epi16(a, 0b00001111, a, b);
12895        let e = _mm_set_epi16(-1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12896        assert_eq_m128i(r, e);
12897    }
12898
12899    #[simd_test(enable = "avx512bw,avx512vl")]
12900    unsafe fn test_mm_maskz_subs_epi16() {
12901        let a = _mm_set1_epi16(-1);
12902        let b = _mm_set1_epi16(i16::MAX);
12903        let r = _mm_maskz_subs_epi16(0, a, b);
12904        assert_eq_m128i(r, _mm_setzero_si128());
12905        let r = _mm_maskz_subs_epi16(0b00001111, a, b);
12906        let e = _mm_set_epi16(0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12907        assert_eq_m128i(r, e);
12908    }
12909
12910    #[simd_test(enable = "avx512bw")]
12911    unsafe fn test_mm512_subs_epi8() {
12912        let a = _mm512_set1_epi8(-1);
12913        let b = _mm512_set1_epi8(i8::MAX);
12914        let r = _mm512_subs_epi8(a, b);
12915        let e = _mm512_set1_epi8(i8::MIN);
12916        assert_eq_m512i(r, e);
12917    }
12918
12919    #[simd_test(enable = "avx512bw")]
12920    unsafe fn test_mm512_mask_subs_epi8() {
12921        let a = _mm512_set1_epi8(-1);
12922        let b = _mm512_set1_epi8(i8::MAX);
12923        let r = _mm512_mask_subs_epi8(a, 0, a, b);
12924        assert_eq_m512i(r, a);
12925        let r = _mm512_mask_subs_epi8(
12926            a,
12927            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12928            a,
12929            b,
12930        );
12931        #[rustfmt::skip]
12932        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12933                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12934                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12935                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12936        assert_eq_m512i(r, e);
12937    }
12938
12939    #[simd_test(enable = "avx512bw")]
12940    unsafe fn test_mm512_maskz_subs_epi8() {
12941        let a = _mm512_set1_epi8(-1);
12942        let b = _mm512_set1_epi8(i8::MAX);
12943        let r = _mm512_maskz_subs_epi8(0, a, b);
12944        assert_eq_m512i(r, _mm512_setzero_si512());
12945        let r = _mm512_maskz_subs_epi8(
12946            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12947            a,
12948            b,
12949        );
12950        #[rustfmt::skip]
12951        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12952                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12953                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12954                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12955        assert_eq_m512i(r, e);
12956    }
12957
12958    #[simd_test(enable = "avx512bw,avx512vl")]
12959    unsafe fn test_mm256_mask_subs_epi8() {
12960        let a = _mm256_set1_epi8(-1);
12961        let b = _mm256_set1_epi8(i8::MAX);
12962        let r = _mm256_mask_subs_epi8(a, 0, a, b);
12963        assert_eq_m256i(r, a);
12964        let r = _mm256_mask_subs_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
12965        #[rustfmt::skip]
12966        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12967                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12968        assert_eq_m256i(r, e);
12969    }
12970
12971    #[simd_test(enable = "avx512bw,avx512vl")]
12972    unsafe fn test_mm256_maskz_subs_epi8() {
12973        let a = _mm256_set1_epi8(-1);
12974        let b = _mm256_set1_epi8(i8::MAX);
12975        let r = _mm256_maskz_subs_epi8(0, a, b);
12976        assert_eq_m256i(r, _mm256_setzero_si256());
12977        let r = _mm256_maskz_subs_epi8(0b00000000_00000000_00000000_00001111, a, b);
12978        #[rustfmt::skip]
12979        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12980                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12981        assert_eq_m256i(r, e);
12982    }
12983
12984    #[simd_test(enable = "avx512bw,avx512vl")]
12985    unsafe fn test_mm_mask_subs_epi8() {
12986        let a = _mm_set1_epi8(-1);
12987        let b = _mm_set1_epi8(i8::MAX);
12988        let r = _mm_mask_subs_epi8(a, 0, a, b);
12989        assert_eq_m128i(r, a);
12990        let r = _mm_mask_subs_epi8(a, 0b00000000_00001111, a, b);
12991        #[rustfmt::skip]
12992        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12993        assert_eq_m128i(r, e);
12994    }
12995
12996    #[simd_test(enable = "avx512bw,avx512vl")]
12997    unsafe fn test_mm_maskz_subs_epi8() {
12998        let a = _mm_set1_epi8(-1);
12999        let b = _mm_set1_epi8(i8::MAX);
13000        let r = _mm_maskz_subs_epi8(0, a, b);
13001        assert_eq_m128i(r, _mm_setzero_si128());
13002        let r = _mm_maskz_subs_epi8(0b00000000_00001111, a, b);
13003        #[rustfmt::skip]
13004        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13005        assert_eq_m128i(r, e);
13006    }
13007
13008    #[simd_test(enable = "avx512bw")]
13009    unsafe fn test_mm512_mulhi_epu16() {
13010        let a = _mm512_set1_epi16(1);
13011        let b = _mm512_set1_epi16(1);
13012        let r = _mm512_mulhi_epu16(a, b);
13013        let e = _mm512_set1_epi16(0);
13014        assert_eq_m512i(r, e);
13015    }
13016
13017    #[simd_test(enable = "avx512bw")]
13018    unsafe fn test_mm512_mask_mulhi_epu16() {
13019        let a = _mm512_set1_epi16(1);
13020        let b = _mm512_set1_epi16(1);
13021        let r = _mm512_mask_mulhi_epu16(a, 0, a, b);
13022        assert_eq_m512i(r, a);
13023        let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13024        #[rustfmt::skip]
13025        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13026                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13027        assert_eq_m512i(r, e);
13028    }
13029
13030    #[simd_test(enable = "avx512bw")]
13031    unsafe fn test_mm512_maskz_mulhi_epu16() {
13032        let a = _mm512_set1_epi16(1);
13033        let b = _mm512_set1_epi16(1);
13034        let r = _mm512_maskz_mulhi_epu16(0, a, b);
13035        assert_eq_m512i(r, _mm512_setzero_si512());
13036        let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b);
13037        #[rustfmt::skip]
13038        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13039                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13040        assert_eq_m512i(r, e);
13041    }
13042
13043    #[simd_test(enable = "avx512bw,avx512vl")]
13044    unsafe fn test_mm256_mask_mulhi_epu16() {
13045        let a = _mm256_set1_epi16(1);
13046        let b = _mm256_set1_epi16(1);
13047        let r = _mm256_mask_mulhi_epu16(a, 0, a, b);
13048        assert_eq_m256i(r, a);
13049        let r = _mm256_mask_mulhi_epu16(a, 0b00000000_00001111, a, b);
13050        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13051        assert_eq_m256i(r, e);
13052    }
13053
13054    #[simd_test(enable = "avx512bw,avx512vl")]
13055    unsafe fn test_mm256_maskz_mulhi_epu16() {
13056        let a = _mm256_set1_epi16(1);
13057        let b = _mm256_set1_epi16(1);
13058        let r = _mm256_maskz_mulhi_epu16(0, a, b);
13059        assert_eq_m256i(r, _mm256_setzero_si256());
13060        let r = _mm256_maskz_mulhi_epu16(0b00000000_00001111, a, b);
13061        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13062        assert_eq_m256i(r, e);
13063    }
13064
13065    #[simd_test(enable = "avx512bw,avx512vl")]
13066    unsafe fn test_mm_mask_mulhi_epu16() {
13067        let a = _mm_set1_epi16(1);
13068        let b = _mm_set1_epi16(1);
13069        let r = _mm_mask_mulhi_epu16(a, 0, a, b);
13070        assert_eq_m128i(r, a);
13071        let r = _mm_mask_mulhi_epu16(a, 0b00001111, a, b);
13072        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13073        assert_eq_m128i(r, e);
13074    }
13075
13076    #[simd_test(enable = "avx512bw,avx512vl")]
13077    unsafe fn test_mm_maskz_mulhi_epu16() {
13078        let a = _mm_set1_epi16(1);
13079        let b = _mm_set1_epi16(1);
13080        let r = _mm_maskz_mulhi_epu16(0, a, b);
13081        assert_eq_m128i(r, _mm_setzero_si128());
13082        let r = _mm_maskz_mulhi_epu16(0b00001111, a, b);
13083        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13084        assert_eq_m128i(r, e);
13085    }
13086
13087    #[simd_test(enable = "avx512bw")]
13088    unsafe fn test_mm512_mulhi_epi16() {
13089        let a = _mm512_set1_epi16(1);
13090        let b = _mm512_set1_epi16(1);
13091        let r = _mm512_mulhi_epi16(a, b);
13092        let e = _mm512_set1_epi16(0);
13093        assert_eq_m512i(r, e);
13094    }
13095
13096    #[simd_test(enable = "avx512bw")]
13097    unsafe fn test_mm512_mask_mulhi_epi16() {
13098        let a = _mm512_set1_epi16(1);
13099        let b = _mm512_set1_epi16(1);
13100        let r = _mm512_mask_mulhi_epi16(a, 0, a, b);
13101        assert_eq_m512i(r, a);
13102        let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13103        #[rustfmt::skip]
13104        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13105                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13106        assert_eq_m512i(r, e);
13107    }
13108
13109    #[simd_test(enable = "avx512bw")]
13110    unsafe fn test_mm512_maskz_mulhi_epi16() {
13111        let a = _mm512_set1_epi16(1);
13112        let b = _mm512_set1_epi16(1);
13113        let r = _mm512_maskz_mulhi_epi16(0, a, b);
13114        assert_eq_m512i(r, _mm512_setzero_si512());
13115        let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b);
13116        #[rustfmt::skip]
13117        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13118                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13119        assert_eq_m512i(r, e);
13120    }
13121
13122    #[simd_test(enable = "avx512bw,avx512vl")]
13123    unsafe fn test_mm256_mask_mulhi_epi16() {
13124        let a = _mm256_set1_epi16(1);
13125        let b = _mm256_set1_epi16(1);
13126        let r = _mm256_mask_mulhi_epi16(a, 0, a, b);
13127        assert_eq_m256i(r, a);
13128        let r = _mm256_mask_mulhi_epi16(a, 0b00000000_00001111, a, b);
13129        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13130        assert_eq_m256i(r, e);
13131    }
13132
13133    #[simd_test(enable = "avx512bw,avx512vl")]
13134    unsafe fn test_mm256_maskz_mulhi_epi16() {
13135        let a = _mm256_set1_epi16(1);
13136        let b = _mm256_set1_epi16(1);
13137        let r = _mm256_maskz_mulhi_epi16(0, a, b);
13138        assert_eq_m256i(r, _mm256_setzero_si256());
13139        let r = _mm256_maskz_mulhi_epi16(0b00000000_00001111, a, b);
13140        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13141        assert_eq_m256i(r, e);
13142    }
13143
13144    #[simd_test(enable = "avx512bw,avx512vl")]
13145    unsafe fn test_mm_mask_mulhi_epi16() {
13146        let a = _mm_set1_epi16(1);
13147        let b = _mm_set1_epi16(1);
13148        let r = _mm_mask_mulhi_epi16(a, 0, a, b);
13149        assert_eq_m128i(r, a);
13150        let r = _mm_mask_mulhi_epi16(a, 0b00001111, a, b);
13151        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13152        assert_eq_m128i(r, e);
13153    }
13154
13155    #[simd_test(enable = "avx512bw,avx512vl")]
13156    unsafe fn test_mm_maskz_mulhi_epi16() {
13157        let a = _mm_set1_epi16(1);
13158        let b = _mm_set1_epi16(1);
13159        let r = _mm_maskz_mulhi_epi16(0, a, b);
13160        assert_eq_m128i(r, _mm_setzero_si128());
13161        let r = _mm_maskz_mulhi_epi16(0b00001111, a, b);
13162        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13163        assert_eq_m128i(r, e);
13164    }
13165
13166    #[simd_test(enable = "avx512bw")]
13167    unsafe fn test_mm512_mulhrs_epi16() {
13168        let a = _mm512_set1_epi16(1);
13169        let b = _mm512_set1_epi16(1);
13170        let r = _mm512_mulhrs_epi16(a, b);
13171        let e = _mm512_set1_epi16(0);
13172        assert_eq_m512i(r, e);
13173    }
13174
13175    #[simd_test(enable = "avx512bw")]
13176    unsafe fn test_mm512_mask_mulhrs_epi16() {
13177        let a = _mm512_set1_epi16(1);
13178        let b = _mm512_set1_epi16(1);
13179        let r = _mm512_mask_mulhrs_epi16(a, 0, a, b);
13180        assert_eq_m512i(r, a);
13181        let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13182        #[rustfmt::skip]
13183        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13184                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13185        assert_eq_m512i(r, e);
13186    }
13187
13188    #[simd_test(enable = "avx512bw")]
13189    unsafe fn test_mm512_maskz_mulhrs_epi16() {
13190        let a = _mm512_set1_epi16(1);
13191        let b = _mm512_set1_epi16(1);
13192        let r = _mm512_maskz_mulhrs_epi16(0, a, b);
13193        assert_eq_m512i(r, _mm512_setzero_si512());
13194        let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b);
13195        #[rustfmt::skip]
13196        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13197                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13198        assert_eq_m512i(r, e);
13199    }
13200
13201    #[simd_test(enable = "avx512bw,avx512vl")]
13202    unsafe fn test_mm256_mask_mulhrs_epi16() {
13203        let a = _mm256_set1_epi16(1);
13204        let b = _mm256_set1_epi16(1);
13205        let r = _mm256_mask_mulhrs_epi16(a, 0, a, b);
13206        assert_eq_m256i(r, a);
13207        let r = _mm256_mask_mulhrs_epi16(a, 0b00000000_00001111, a, b);
13208        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13209        assert_eq_m256i(r, e);
13210    }
13211
13212    #[simd_test(enable = "avx512bw,avx512vl")]
13213    unsafe fn test_mm256_maskz_mulhrs_epi16() {
13214        let a = _mm256_set1_epi16(1);
13215        let b = _mm256_set1_epi16(1);
13216        let r = _mm256_maskz_mulhrs_epi16(0, a, b);
13217        assert_eq_m256i(r, _mm256_setzero_si256());
13218        let r = _mm256_maskz_mulhrs_epi16(0b00000000_00001111, a, b);
13219        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13220        assert_eq_m256i(r, e);
13221    }
13222
13223    #[simd_test(enable = "avx512bw,avx512vl")]
13224    unsafe fn test_mm_mask_mulhrs_epi16() {
13225        let a = _mm_set1_epi16(1);
13226        let b = _mm_set1_epi16(1);
13227        let r = _mm_mask_mulhrs_epi16(a, 0, a, b);
13228        assert_eq_m128i(r, a);
13229        let r = _mm_mask_mulhrs_epi16(a, 0b00001111, a, b);
13230        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13231        assert_eq_m128i(r, e);
13232    }
13233
13234    #[simd_test(enable = "avx512bw,avx512vl")]
13235    unsafe fn test_mm_maskz_mulhrs_epi16() {
13236        let a = _mm_set1_epi16(1);
13237        let b = _mm_set1_epi16(1);
13238        let r = _mm_maskz_mulhrs_epi16(0, a, b);
13239        assert_eq_m128i(r, _mm_setzero_si128());
13240        let r = _mm_maskz_mulhrs_epi16(0b00001111, a, b);
13241        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13242        assert_eq_m128i(r, e);
13243    }
13244
13245    #[simd_test(enable = "avx512bw")]
13246    unsafe fn test_mm512_mullo_epi16() {
13247        let a = _mm512_set1_epi16(1);
13248        let b = _mm512_set1_epi16(1);
13249        let r = _mm512_mullo_epi16(a, b);
13250        let e = _mm512_set1_epi16(1);
13251        assert_eq_m512i(r, e);
13252    }
13253
13254    #[simd_test(enable = "avx512bw")]
13255    unsafe fn test_mm512_mask_mullo_epi16() {
13256        let a = _mm512_set1_epi16(1);
13257        let b = _mm512_set1_epi16(1);
13258        let r = _mm512_mask_mullo_epi16(a, 0, a, b);
13259        assert_eq_m512i(r, a);
13260        let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13261        #[rustfmt::skip]
13262        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13263                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
13264        assert_eq_m512i(r, e);
13265    }
13266
13267    #[simd_test(enable = "avx512bw")]
13268    unsafe fn test_mm512_maskz_mullo_epi16() {
13269        let a = _mm512_set1_epi16(1);
13270        let b = _mm512_set1_epi16(1);
13271        let r = _mm512_maskz_mullo_epi16(0, a, b);
13272        assert_eq_m512i(r, _mm512_setzero_si512());
13273        let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b);
13274        #[rustfmt::skip]
13275        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13276                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
13277        assert_eq_m512i(r, e);
13278    }
13279
13280    #[simd_test(enable = "avx512bw,avx512vl")]
13281    unsafe fn test_mm256_mask_mullo_epi16() {
13282        let a = _mm256_set1_epi16(1);
13283        let b = _mm256_set1_epi16(1);
13284        let r = _mm256_mask_mullo_epi16(a, 0, a, b);
13285        assert_eq_m256i(r, a);
13286        let r = _mm256_mask_mullo_epi16(a, 0b00000000_00001111, a, b);
13287        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
13288        assert_eq_m256i(r, e);
13289    }
13290
13291    #[simd_test(enable = "avx512bw,avx512vl")]
13292    unsafe fn test_mm256_maskz_mullo_epi16() {
13293        let a = _mm256_set1_epi16(1);
13294        let b = _mm256_set1_epi16(1);
13295        let r = _mm256_maskz_mullo_epi16(0, a, b);
13296        assert_eq_m256i(r, _mm256_setzero_si256());
13297        let r = _mm256_maskz_mullo_epi16(0b00000000_00001111, a, b);
13298        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
13299        assert_eq_m256i(r, e);
13300    }
13301
13302    #[simd_test(enable = "avx512bw,avx512vl")]
13303    unsafe fn test_mm_mask_mullo_epi16() {
13304        let a = _mm_set1_epi16(1);
13305        let b = _mm_set1_epi16(1);
13306        let r = _mm_mask_mullo_epi16(a, 0, a, b);
13307        assert_eq_m128i(r, a);
13308        let r = _mm_mask_mullo_epi16(a, 0b00001111, a, b);
13309        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
13310        assert_eq_m128i(r, e);
13311    }
13312
13313    #[simd_test(enable = "avx512bw,avx512vl")]
13314    unsafe fn test_mm_maskz_mullo_epi16() {
13315        let a = _mm_set1_epi16(1);
13316        let b = _mm_set1_epi16(1);
13317        let r = _mm_maskz_mullo_epi16(0, a, b);
13318        assert_eq_m128i(r, _mm_setzero_si128());
13319        let r = _mm_maskz_mullo_epi16(0b00001111, a, b);
13320        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
13321        assert_eq_m128i(r, e);
13322    }
13323
13324    #[simd_test(enable = "avx512bw")]
13325    unsafe fn test_mm512_max_epu16() {
13326        #[rustfmt::skip]
13327        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13328                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13329        #[rustfmt::skip]
13330        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13331                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13332        let r = _mm512_max_epu16(a, b);
13333        #[rustfmt::skip]
13334        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13335                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13336        assert_eq_m512i(r, e);
13337    }
13338
13339    #[simd_test(enable = "avx512bw")]
13340    unsafe fn test_mm512_mask_max_epu16() {
13341        #[rustfmt::skip]
13342        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13343                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13344        #[rustfmt::skip]
13345        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13346                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13347        let r = _mm512_mask_max_epu16(a, 0, a, b);
13348        assert_eq_m512i(r, a);
13349        let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
13350        #[rustfmt::skip]
13351        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13352                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13353        assert_eq_m512i(r, e);
13354    }
13355
13356    #[simd_test(enable = "avx512bw")]
13357    unsafe fn test_mm512_maskz_max_epu16() {
13358        #[rustfmt::skip]
13359        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13360                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13361        #[rustfmt::skip]
13362        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13363                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13364        let r = _mm512_maskz_max_epu16(0, a, b);
13365        assert_eq_m512i(r, _mm512_setzero_si512());
13366        let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b);
13367        #[rustfmt::skip]
13368        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13369                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13370        assert_eq_m512i(r, e);
13371    }
13372
13373    #[simd_test(enable = "avx512bw,avx512vl")]
13374    unsafe fn test_mm256_mask_max_epu16() {
13375        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13376        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13377        let r = _mm256_mask_max_epu16(a, 0, a, b);
13378        assert_eq_m256i(r, a);
13379        let r = _mm256_mask_max_epu16(a, 0b00000000_11111111, a, b);
13380        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13381        assert_eq_m256i(r, e);
13382    }
13383
13384    #[simd_test(enable = "avx512bw,avx512vl")]
13385    unsafe fn test_mm256_maskz_max_epu16() {
13386        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13387        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13388        let r = _mm256_maskz_max_epu16(0, a, b);
13389        assert_eq_m256i(r, _mm256_setzero_si256());
13390        let r = _mm256_maskz_max_epu16(0b00000000_11111111, a, b);
13391        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13392        assert_eq_m256i(r, e);
13393    }
13394
13395    #[simd_test(enable = "avx512bw,avx512vl")]
13396    unsafe fn test_mm_mask_max_epu16() {
13397        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13398        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13399        let r = _mm_mask_max_epu16(a, 0, a, b);
13400        assert_eq_m128i(r, a);
13401        let r = _mm_mask_max_epu16(a, 0b00001111, a, b);
13402        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13403        assert_eq_m128i(r, e);
13404    }
13405
13406    #[simd_test(enable = "avx512bw,avx512vl")]
13407    unsafe fn test_mm_maskz_max_epu16() {
13408        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13409        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13410        let r = _mm_maskz_max_epu16(0, a, b);
13411        assert_eq_m128i(r, _mm_setzero_si128());
13412        let r = _mm_maskz_max_epu16(0b00001111, a, b);
13413        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
13414        assert_eq_m128i(r, e);
13415    }
13416
13417    #[simd_test(enable = "avx512bw")]
13418    unsafe fn test_mm512_max_epu8() {
13419        #[rustfmt::skip]
13420        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13421                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13422                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13423                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13424        #[rustfmt::skip]
13425        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13426                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13427                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13428                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13429        let r = _mm512_max_epu8(a, b);
13430        #[rustfmt::skip]
13431        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13432                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13433                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13434                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13435        assert_eq_m512i(r, e);
13436    }
13437
13438    #[simd_test(enable = "avx512bw")]
13439    unsafe fn test_mm512_mask_max_epu8() {
13440        #[rustfmt::skip]
13441        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13442                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13443                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13444                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13445        #[rustfmt::skip]
13446        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13447                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13448                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13449                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13450        let r = _mm512_mask_max_epu8(a, 0, a, b);
13451        assert_eq_m512i(r, a);
13452        let r = _mm512_mask_max_epu8(
13453            a,
13454            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13455            a,
13456            b,
13457        );
13458        #[rustfmt::skip]
13459        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13460                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13461                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13462                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13463        assert_eq_m512i(r, e);
13464    }
13465
13466    #[simd_test(enable = "avx512bw")]
13467    unsafe fn test_mm512_maskz_max_epu8() {
13468        #[rustfmt::skip]
13469        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13470                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13471                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13472                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13473        #[rustfmt::skip]
13474        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13475                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13476                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13477                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13478        let r = _mm512_maskz_max_epu8(0, a, b);
13479        assert_eq_m512i(r, _mm512_setzero_si512());
13480        let r = _mm512_maskz_max_epu8(
13481            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13482            a,
13483            b,
13484        );
13485        #[rustfmt::skip]
13486        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13487                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13488                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13489                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13490        assert_eq_m512i(r, e);
13491    }
13492
13493    #[simd_test(enable = "avx512bw,avx512vl")]
13494    unsafe fn test_mm256_mask_max_epu8() {
13495        #[rustfmt::skip]
13496        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13497                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13498        #[rustfmt::skip]
13499        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13500                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13501        let r = _mm256_mask_max_epu8(a, 0, a, b);
13502        assert_eq_m256i(r, a);
13503        let r = _mm256_mask_max_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
13504        #[rustfmt::skip]
13505        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13506                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13507        assert_eq_m256i(r, e);
13508    }
13509
13510    #[simd_test(enable = "avx512bw,avx512vl")]
13511    unsafe fn test_mm256_maskz_max_epu8() {
13512        #[rustfmt::skip]
13513        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13514                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13515        #[rustfmt::skip]
13516        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13517                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13518        let r = _mm256_maskz_max_epu8(0, a, b);
13519        assert_eq_m256i(r, _mm256_setzero_si256());
13520        let r = _mm256_maskz_max_epu8(0b00000000_11111111_00000000_11111111, a, b);
13521        #[rustfmt::skip]
13522        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13523                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13524        assert_eq_m256i(r, e);
13525    }
13526
13527    #[simd_test(enable = "avx512bw,avx512vl")]
13528    unsafe fn test_mm_mask_max_epu8() {
13529        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13530        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13531        let r = _mm_mask_max_epu8(a, 0, a, b);
13532        assert_eq_m128i(r, a);
13533        let r = _mm_mask_max_epu8(a, 0b00000000_11111111, a, b);
13534        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13535        assert_eq_m128i(r, e);
13536    }
13537
13538    #[simd_test(enable = "avx512bw,avx512vl")]
13539    unsafe fn test_mm_maskz_max_epu8() {
13540        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13541        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13542        let r = _mm_maskz_max_epu8(0, a, b);
13543        assert_eq_m128i(r, _mm_setzero_si128());
13544        let r = _mm_maskz_max_epu8(0b00000000_11111111, a, b);
13545        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13546        assert_eq_m128i(r, e);
13547    }
13548
13549    #[simd_test(enable = "avx512bw")]
13550    unsafe fn test_mm512_max_epi16() {
13551        #[rustfmt::skip]
13552        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13553                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13554        #[rustfmt::skip]
13555        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13556                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13557        let r = _mm512_max_epi16(a, b);
13558        #[rustfmt::skip]
13559        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13560                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13561        assert_eq_m512i(r, e);
13562    }
13563
13564    #[simd_test(enable = "avx512bw")]
13565    unsafe fn test_mm512_mask_max_epi16() {
13566        #[rustfmt::skip]
13567        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13568                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13569        #[rustfmt::skip]
13570        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13571                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13572        let r = _mm512_mask_max_epi16(a, 0, a, b);
13573        assert_eq_m512i(r, a);
13574        let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
13575        #[rustfmt::skip]
13576        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13577                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13578        assert_eq_m512i(r, e);
13579    }
13580
13581    #[simd_test(enable = "avx512bw")]
13582    unsafe fn test_mm512_maskz_max_epi16() {
13583        #[rustfmt::skip]
13584        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13585                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13586        #[rustfmt::skip]
13587        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13588                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13589        let r = _mm512_maskz_max_epi16(0, a, b);
13590        assert_eq_m512i(r, _mm512_setzero_si512());
13591        let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b);
13592        #[rustfmt::skip]
13593        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13594                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13595        assert_eq_m512i(r, e);
13596    }
13597
13598    #[simd_test(enable = "avx512bw,avx512vl")]
13599    unsafe fn test_mm256_mask_max_epi16() {
13600        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13601        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13602        let r = _mm256_mask_max_epi16(a, 0, a, b);
13603        assert_eq_m256i(r, a);
13604        let r = _mm256_mask_max_epi16(a, 0b00000000_11111111, a, b);
13605        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13606        assert_eq_m256i(r, e);
13607    }
13608
13609    #[simd_test(enable = "avx512bw,avx512vl")]
13610    unsafe fn test_mm256_maskz_max_epi16() {
13611        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13612        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13613        let r = _mm256_maskz_max_epi16(0, a, b);
13614        assert_eq_m256i(r, _mm256_setzero_si256());
13615        let r = _mm256_maskz_max_epi16(0b00000000_11111111, a, b);
13616        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13617        assert_eq_m256i(r, e);
13618    }
13619
13620    #[simd_test(enable = "avx512bw,avx512vl")]
13621    unsafe fn test_mm_mask_max_epi16() {
13622        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13623        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13624        let r = _mm_mask_max_epi16(a, 0, a, b);
13625        assert_eq_m128i(r, a);
13626        let r = _mm_mask_max_epi16(a, 0b00001111, a, b);
13627        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13628        assert_eq_m128i(r, e);
13629    }
13630
13631    #[simd_test(enable = "avx512bw,avx512vl")]
13632    unsafe fn test_mm_maskz_max_epi16() {
13633        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13634        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13635        let r = _mm_maskz_max_epi16(0, a, b);
13636        assert_eq_m128i(r, _mm_setzero_si128());
13637        let r = _mm_maskz_max_epi16(0b00001111, a, b);
13638        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
13639        assert_eq_m128i(r, e);
13640    }
13641
13642    #[simd_test(enable = "avx512bw")]
13643    unsafe fn test_mm512_max_epi8() {
13644        #[rustfmt::skip]
13645        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13646                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13647                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13648                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13649        #[rustfmt::skip]
13650        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13651                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13652                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13653                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13654        let r = _mm512_max_epi8(a, b);
13655        #[rustfmt::skip]
13656        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13657                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13658                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13659                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13660        assert_eq_m512i(r, e);
13661    }
13662
13663    #[simd_test(enable = "avx512bw")]
13664    unsafe fn test_mm512_mask_max_epi8() {
13665        #[rustfmt::skip]
13666        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13667                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13668                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13669                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13670        #[rustfmt::skip]
13671        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13672                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13673                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13674                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13675        let r = _mm512_mask_max_epi8(a, 0, a, b);
13676        assert_eq_m512i(r, a);
13677        let r = _mm512_mask_max_epi8(
13678            a,
13679            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13680            a,
13681            b,
13682        );
13683        #[rustfmt::skip]
13684        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13685                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13686                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13687                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13688        assert_eq_m512i(r, e);
13689    }
13690
13691    #[simd_test(enable = "avx512bw")]
13692    unsafe fn test_mm512_maskz_max_epi8() {
13693        #[rustfmt::skip]
13694        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13695                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13696                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13697                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13698        #[rustfmt::skip]
13699        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13700                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13701                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13702                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13703        let r = _mm512_maskz_max_epi8(0, a, b);
13704        assert_eq_m512i(r, _mm512_setzero_si512());
13705        let r = _mm512_maskz_max_epi8(
13706            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13707            a,
13708            b,
13709        );
13710        #[rustfmt::skip]
13711        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13712                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13713                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13714                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13715        assert_eq_m512i(r, e);
13716    }
13717
13718    #[simd_test(enable = "avx512bw,avx512vl")]
13719    unsafe fn test_mm256_mask_max_epi8() {
13720        #[rustfmt::skip]
13721        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13722                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13723        #[rustfmt::skip]
13724        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13725                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13726        let r = _mm256_mask_max_epi8(a, 0, a, b);
13727        assert_eq_m256i(r, a);
13728        let r = _mm256_mask_max_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
13729        #[rustfmt::skip]
13730        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13731                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13732        assert_eq_m256i(r, e);
13733    }
13734
13735    #[simd_test(enable = "avx512bw,avx512vl")]
13736    unsafe fn test_mm256_maskz_max_epi8() {
13737        #[rustfmt::skip]
13738        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13739                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13740        #[rustfmt::skip]
13741        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13742                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13743        let r = _mm256_maskz_max_epi8(0, a, b);
13744        assert_eq_m256i(r, _mm256_setzero_si256());
13745        let r = _mm256_maskz_max_epi8(0b00000000_11111111_00000000_11111111, a, b);
13746        #[rustfmt::skip]
13747        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13748                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13749        assert_eq_m256i(r, e);
13750    }
13751
13752    #[simd_test(enable = "avx512bw,avx512vl")]
13753    unsafe fn test_mm_mask_max_epi8() {
13754        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13755        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13756        let r = _mm_mask_max_epi8(a, 0, a, b);
13757        assert_eq_m128i(r, a);
13758        let r = _mm_mask_max_epi8(a, 0b00000000_11111111, a, b);
13759        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13760        assert_eq_m128i(r, e);
13761    }
13762
13763    #[simd_test(enable = "avx512bw,avx512vl")]
13764    unsafe fn test_mm_maskz_max_epi8() {
13765        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13766        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13767        let r = _mm_maskz_max_epi8(0, a, b);
13768        assert_eq_m128i(r, _mm_setzero_si128());
13769        let r = _mm_maskz_max_epi8(0b00000000_11111111, a, b);
13770        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13771        assert_eq_m128i(r, e);
13772    }
13773
13774    #[simd_test(enable = "avx512bw")]
13775    unsafe fn test_mm512_min_epu16() {
13776        #[rustfmt::skip]
13777        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13778                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13779        #[rustfmt::skip]
13780        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13781                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13782        let r = _mm512_min_epu16(a, b);
13783        #[rustfmt::skip]
13784        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13785                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13786        assert_eq_m512i(r, e);
13787    }
13788
13789    #[simd_test(enable = "avx512bw")]
13790    unsafe fn test_mm512_mask_min_epu16() {
13791        #[rustfmt::skip]
13792        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13793                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13794        #[rustfmt::skip]
13795        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13796                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13797        let r = _mm512_mask_min_epu16(a, 0, a, b);
13798        assert_eq_m512i(r, a);
13799        let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
13800        #[rustfmt::skip]
13801        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13802                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13803        assert_eq_m512i(r, e);
13804    }
13805
13806    #[simd_test(enable = "avx512bw")]
13807    unsafe fn test_mm512_maskz_min_epu16() {
13808        #[rustfmt::skip]
13809        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13810                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13811        #[rustfmt::skip]
13812        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13813                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13814        let r = _mm512_maskz_min_epu16(0, a, b);
13815        assert_eq_m512i(r, _mm512_setzero_si512());
13816        let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b);
13817        #[rustfmt::skip]
13818        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13819                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13820        assert_eq_m512i(r, e);
13821    }
13822
13823    #[simd_test(enable = "avx512bw,avx512vl")]
13824    unsafe fn test_mm256_mask_min_epu16() {
13825        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13826        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13827        let r = _mm256_mask_min_epu16(a, 0, a, b);
13828        assert_eq_m256i(r, a);
13829        let r = _mm256_mask_min_epu16(a, 0b00000000_11111111, a, b);
13830        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13831        assert_eq_m256i(r, e);
13832    }
13833
13834    #[simd_test(enable = "avx512bw,avx512vl")]
13835    unsafe fn test_mm256_maskz_min_epu16() {
13836        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13837        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13838        let r = _mm256_maskz_min_epu16(0, a, b);
13839        assert_eq_m256i(r, _mm256_setzero_si256());
13840        let r = _mm256_maskz_min_epu16(0b00000000_11111111, a, b);
13841        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13842        assert_eq_m256i(r, e);
13843    }
13844
13845    #[simd_test(enable = "avx512bw,avx512vl")]
13846    unsafe fn test_mm_mask_min_epu16() {
13847        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13848        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13849        let r = _mm_mask_min_epu16(a, 0, a, b);
13850        assert_eq_m128i(r, a);
13851        let r = _mm_mask_min_epu16(a, 0b00001111, a, b);
13852        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
13853        assert_eq_m128i(r, e);
13854    }
13855
13856    #[simd_test(enable = "avx512bw,avx512vl")]
13857    unsafe fn test_mm_maskz_min_epu16() {
13858        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13859        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13860        let r = _mm_maskz_min_epu16(0, a, b);
13861        assert_eq_m128i(r, _mm_setzero_si128());
13862        let r = _mm_maskz_min_epu16(0b00001111, a, b);
13863        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
13864        assert_eq_m128i(r, e);
13865    }
13866
13867    #[simd_test(enable = "avx512bw")]
13868    unsafe fn test_mm512_min_epu8() {
13869        #[rustfmt::skip]
13870        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13871                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13872                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13873                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13874        #[rustfmt::skip]
13875        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13876                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13877                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13878                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13879        let r = _mm512_min_epu8(a, b);
13880        #[rustfmt::skip]
13881        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13882                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13883                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13884                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13885        assert_eq_m512i(r, e);
13886    }
13887
13888    #[simd_test(enable = "avx512bw")]
13889    unsafe fn test_mm512_mask_min_epu8() {
13890        #[rustfmt::skip]
13891        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13892                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13893                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13894                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13895        #[rustfmt::skip]
13896        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13897                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13898                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13899                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13900        let r = _mm512_mask_min_epu8(a, 0, a, b);
13901        assert_eq_m512i(r, a);
13902        let r = _mm512_mask_min_epu8(
13903            a,
13904            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13905            a,
13906            b,
13907        );
13908        #[rustfmt::skip]
13909        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13910                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13911                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13912                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13913        assert_eq_m512i(r, e);
13914    }
13915
13916    #[simd_test(enable = "avx512bw")]
13917    unsafe fn test_mm512_maskz_min_epu8() {
13918        #[rustfmt::skip]
13919        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13920                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13921                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13922                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13923        #[rustfmt::skip]
13924        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13925                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13926                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13927                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13928        let r = _mm512_maskz_min_epu8(0, a, b);
13929        assert_eq_m512i(r, _mm512_setzero_si512());
13930        let r = _mm512_maskz_min_epu8(
13931            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13932            a,
13933            b,
13934        );
13935        #[rustfmt::skip]
13936        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13937                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13938                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13939                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13940        assert_eq_m512i(r, e);
13941    }
13942
13943    #[simd_test(enable = "avx512bw,avx512vl")]
13944    unsafe fn test_mm256_mask_min_epu8() {
13945        #[rustfmt::skip]
13946        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13947                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13948        #[rustfmt::skip]
13949        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13950                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13951        let r = _mm256_mask_min_epu8(a, 0, a, b);
13952        assert_eq_m256i(r, a);
13953        let r = _mm256_mask_min_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
13954        #[rustfmt::skip]
13955        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13956                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13957        assert_eq_m256i(r, e);
13958    }
13959
13960    #[simd_test(enable = "avx512bw,avx512vl")]
13961    unsafe fn test_mm256_maskz_min_epu8() {
13962        #[rustfmt::skip]
13963        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13964                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13965        #[rustfmt::skip]
13966        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13967                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13968        let r = _mm256_maskz_min_epu8(0, a, b);
13969        assert_eq_m256i(r, _mm256_setzero_si256());
13970        let r = _mm256_maskz_min_epu8(0b00000000_11111111_00000000_11111111, a, b);
13971        #[rustfmt::skip]
13972        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13973                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13974        assert_eq_m256i(r, e);
13975    }
13976
13977    #[simd_test(enable = "avx512bw,avx512vl")]
13978    unsafe fn test_mm_mask_min_epu8() {
13979        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13980        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13981        let r = _mm_mask_min_epu8(a, 0, a, b);
13982        assert_eq_m128i(r, a);
13983        let r = _mm_mask_min_epu8(a, 0b00000000_11111111, a, b);
13984        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13985        assert_eq_m128i(r, e);
13986    }
13987
13988    #[simd_test(enable = "avx512bw,avx512vl")]
13989    unsafe fn test_mm_maskz_min_epu8() {
13990        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13991        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13992        let r = _mm_maskz_min_epu8(0, a, b);
13993        assert_eq_m128i(r, _mm_setzero_si128());
13994        let r = _mm_maskz_min_epu8(0b00000000_11111111, a, b);
13995        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13996        assert_eq_m128i(r, e);
13997    }
13998
13999    #[simd_test(enable = "avx512bw")]
14000    unsafe fn test_mm512_min_epi16() {
14001        #[rustfmt::skip]
14002        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14003                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14004        #[rustfmt::skip]
14005        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14006                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14007        let r = _mm512_min_epi16(a, b);
14008        #[rustfmt::skip]
14009        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14010                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14011        assert_eq_m512i(r, e);
14012    }
14013
14014    #[simd_test(enable = "avx512bw")]
14015    unsafe fn test_mm512_mask_min_epi16() {
14016        #[rustfmt::skip]
14017        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14018                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14019        #[rustfmt::skip]
14020        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14021                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14022        let r = _mm512_mask_min_epi16(a, 0, a, b);
14023        assert_eq_m512i(r, a);
14024        let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
14025        #[rustfmt::skip]
14026        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14027                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14028        assert_eq_m512i(r, e);
14029    }
14030
14031    #[simd_test(enable = "avx512bw")]
14032    unsafe fn test_mm512_maskz_min_epi16() {
14033        #[rustfmt::skip]
14034        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14035                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14036        #[rustfmt::skip]
14037        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14038                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14039        let r = _mm512_maskz_min_epi16(0, a, b);
14040        assert_eq_m512i(r, _mm512_setzero_si512());
14041        let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b);
14042        #[rustfmt::skip]
14043        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14044                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14045        assert_eq_m512i(r, e);
14046    }
14047
14048    #[simd_test(enable = "avx512bw,avx512vl")]
14049    unsafe fn test_mm256_mask_min_epi16() {
14050        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14051        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14052        let r = _mm256_mask_min_epi16(a, 0, a, b);
14053        assert_eq_m256i(r, a);
14054        let r = _mm256_mask_min_epi16(a, 0b00000000_11111111, a, b);
14055        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14056        assert_eq_m256i(r, e);
14057    }
14058
14059    #[simd_test(enable = "avx512bw,avx512vl")]
14060    unsafe fn test_mm256_maskz_min_epi16() {
14061        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14062        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14063        let r = _mm256_maskz_min_epi16(0, a, b);
14064        assert_eq_m256i(r, _mm256_setzero_si256());
14065        let r = _mm256_maskz_min_epi16(0b00000000_11111111, a, b);
14066        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14067        assert_eq_m256i(r, e);
14068    }
14069
14070    #[simd_test(enable = "avx512bw,avx512vl")]
14071    unsafe fn test_mm_mask_min_epi16() {
14072        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14073        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14074        let r = _mm_mask_min_epi16(a, 0, a, b);
14075        assert_eq_m128i(r, a);
14076        let r = _mm_mask_min_epi16(a, 0b00001111, a, b);
14077        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
14078        assert_eq_m128i(r, e);
14079    }
14080
14081    #[simd_test(enable = "avx512bw,avx512vl")]
14082    unsafe fn test_mm_maskz_min_epi16() {
14083        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14084        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14085        let r = _mm_maskz_min_epi16(0, a, b);
14086        assert_eq_m128i(r, _mm_setzero_si128());
14087        let r = _mm_maskz_min_epi16(0b00001111, a, b);
14088        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
14089        assert_eq_m128i(r, e);
14090    }
14091
14092    #[simd_test(enable = "avx512bw")]
14093    unsafe fn test_mm512_min_epi8() {
14094        #[rustfmt::skip]
14095        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14096                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14097                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14098                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14099        #[rustfmt::skip]
14100        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14101                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14102                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14103                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14104        let r = _mm512_min_epi8(a, b);
14105        #[rustfmt::skip]
14106        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14107                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14108                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14109                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14110        assert_eq_m512i(r, e);
14111    }
14112
14113    #[simd_test(enable = "avx512bw")]
14114    unsafe fn test_mm512_mask_min_epi8() {
14115        #[rustfmt::skip]
14116        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14117                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14118                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14119                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14120        #[rustfmt::skip]
14121        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14122                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14123                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14124                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14125        let r = _mm512_mask_min_epi8(a, 0, a, b);
14126        assert_eq_m512i(r, a);
14127        let r = _mm512_mask_min_epi8(
14128            a,
14129            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14130            a,
14131            b,
14132        );
14133        #[rustfmt::skip]
14134        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14135                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14136                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14137                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14138        assert_eq_m512i(r, e);
14139    }
14140
14141    #[simd_test(enable = "avx512bw")]
14142    unsafe fn test_mm512_maskz_min_epi8() {
14143        #[rustfmt::skip]
14144        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14145                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14146                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14147                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14148        #[rustfmt::skip]
14149        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14150                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14151                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14152                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14153        let r = _mm512_maskz_min_epi8(0, a, b);
14154        assert_eq_m512i(r, _mm512_setzero_si512());
14155        let r = _mm512_maskz_min_epi8(
14156            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14157            a,
14158            b,
14159        );
14160        #[rustfmt::skip]
14161        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14162                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14163                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14164                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14165        assert_eq_m512i(r, e);
14166    }
14167
14168    #[simd_test(enable = "avx512bw,avx512vl")]
14169    unsafe fn test_mm256_mask_min_epi8() {
14170        #[rustfmt::skip]
14171        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14172                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14173        #[rustfmt::skip]
14174        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14175                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14176        let r = _mm256_mask_min_epi8(a, 0, a, b);
14177        assert_eq_m256i(r, a);
14178        let r = _mm256_mask_min_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
14179        #[rustfmt::skip]
14180        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14181                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14182        assert_eq_m256i(r, e);
14183    }
14184
14185    #[simd_test(enable = "avx512bw,avx512vl")]
14186    unsafe fn test_mm256_maskz_min_epi8() {
14187        #[rustfmt::skip]
14188        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14189                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14190        #[rustfmt::skip]
14191        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14192                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14193        let r = _mm256_maskz_min_epi8(0, a, b);
14194        assert_eq_m256i(r, _mm256_setzero_si256());
14195        let r = _mm256_maskz_min_epi8(0b00000000_11111111_00000000_11111111, a, b);
14196        #[rustfmt::skip]
14197        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14198                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14199        assert_eq_m256i(r, e);
14200    }
14201
14202    #[simd_test(enable = "avx512bw,avx512vl")]
14203    unsafe fn test_mm_mask_min_epi8() {
14204        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14205        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14206        let r = _mm_mask_min_epi8(a, 0, a, b);
14207        assert_eq_m128i(r, a);
14208        let r = _mm_mask_min_epi8(a, 0b00000000_11111111, a, b);
14209        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14210        assert_eq_m128i(r, e);
14211    }
14212
14213    #[simd_test(enable = "avx512bw,avx512vl")]
14214    unsafe fn test_mm_maskz_min_epi8() {
14215        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14216        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14217        let r = _mm_maskz_min_epi8(0, a, b);
14218        assert_eq_m128i(r, _mm_setzero_si128());
14219        let r = _mm_maskz_min_epi8(0b00000000_11111111, a, b);
14220        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14221        assert_eq_m128i(r, e);
14222    }
14223
14224    #[simd_test(enable = "avx512bw")]
14225    unsafe fn test_mm512_cmplt_epu16_mask() {
14226        let a = _mm512_set1_epi16(-2);
14227        let b = _mm512_set1_epi16(-1);
14228        let m = _mm512_cmplt_epu16_mask(a, b);
14229        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14230    }
14231
14232    #[simd_test(enable = "avx512bw")]
14233    unsafe fn test_mm512_mask_cmplt_epu16_mask() {
14234        let a = _mm512_set1_epi16(-2);
14235        let b = _mm512_set1_epi16(-1);
14236        let mask = 0b01010101_01010101_01010101_01010101;
14237        let r = _mm512_mask_cmplt_epu16_mask(mask, a, b);
14238        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14239    }
14240
14241    #[simd_test(enable = "avx512bw,avx512vl")]
14242    unsafe fn test_mm256_cmplt_epu16_mask() {
14243        let a = _mm256_set1_epi16(-2);
14244        let b = _mm256_set1_epi16(-1);
14245        let m = _mm256_cmplt_epu16_mask(a, b);
14246        assert_eq!(m, 0b11111111_11111111);
14247    }
14248
14249    #[simd_test(enable = "avx512bw,avx512vl")]
14250    unsafe fn test_mm256_mask_cmplt_epu16_mask() {
14251        let a = _mm256_set1_epi16(-2);
14252        let b = _mm256_set1_epi16(-1);
14253        let mask = 0b01010101_01010101;
14254        let r = _mm256_mask_cmplt_epu16_mask(mask, a, b);
14255        assert_eq!(r, 0b01010101_01010101);
14256    }
14257
14258    #[simd_test(enable = "avx512bw,avx512vl")]
14259    unsafe fn test_mm_cmplt_epu16_mask() {
14260        let a = _mm_set1_epi16(-2);
14261        let b = _mm_set1_epi16(-1);
14262        let m = _mm_cmplt_epu16_mask(a, b);
14263        assert_eq!(m, 0b11111111);
14264    }
14265
14266    #[simd_test(enable = "avx512bw,avx512vl")]
14267    unsafe fn test_mm_mask_cmplt_epu16_mask() {
14268        let a = _mm_set1_epi16(-2);
14269        let b = _mm_set1_epi16(-1);
14270        let mask = 0b01010101;
14271        let r = _mm_mask_cmplt_epu16_mask(mask, a, b);
14272        assert_eq!(r, 0b01010101);
14273    }
14274
14275    #[simd_test(enable = "avx512bw")]
14276    unsafe fn test_mm512_cmplt_epu8_mask() {
14277        let a = _mm512_set1_epi8(-2);
14278        let b = _mm512_set1_epi8(-1);
14279        let m = _mm512_cmplt_epu8_mask(a, b);
14280        assert_eq!(
14281            m,
14282            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14283        );
14284    }
14285
14286    #[simd_test(enable = "avx512bw")]
14287    unsafe fn test_mm512_mask_cmplt_epu8_mask() {
14288        let a = _mm512_set1_epi8(-2);
14289        let b = _mm512_set1_epi8(-1);
14290        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14291        let r = _mm512_mask_cmplt_epu8_mask(mask, a, b);
14292        assert_eq!(
14293            r,
14294            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14295        );
14296    }
14297
14298    #[simd_test(enable = "avx512bw,avx512vl")]
14299    unsafe fn test_mm256_cmplt_epu8_mask() {
14300        let a = _mm256_set1_epi8(-2);
14301        let b = _mm256_set1_epi8(-1);
14302        let m = _mm256_cmplt_epu8_mask(a, b);
14303        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14304    }
14305
14306    #[simd_test(enable = "avx512bw,avx512vl")]
14307    unsafe fn test_mm256_mask_cmplt_epu8_mask() {
14308        let a = _mm256_set1_epi8(-2);
14309        let b = _mm256_set1_epi8(-1);
14310        let mask = 0b01010101_01010101_01010101_01010101;
14311        let r = _mm256_mask_cmplt_epu8_mask(mask, a, b);
14312        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14313    }
14314
14315    #[simd_test(enable = "avx512bw,avx512vl")]
14316    unsafe fn test_mm_cmplt_epu8_mask() {
14317        let a = _mm_set1_epi8(-2);
14318        let b = _mm_set1_epi8(-1);
14319        let m = _mm_cmplt_epu8_mask(a, b);
14320        assert_eq!(m, 0b11111111_11111111);
14321    }
14322
14323    #[simd_test(enable = "avx512bw,avx512vl")]
14324    unsafe fn test_mm_mask_cmplt_epu8_mask() {
14325        let a = _mm_set1_epi8(-2);
14326        let b = _mm_set1_epi8(-1);
14327        let mask = 0b01010101_01010101;
14328        let r = _mm_mask_cmplt_epu8_mask(mask, a, b);
14329        assert_eq!(r, 0b01010101_01010101);
14330    }
14331
14332    #[simd_test(enable = "avx512bw")]
14333    unsafe fn test_mm512_cmplt_epi16_mask() {
14334        let a = _mm512_set1_epi16(-2);
14335        let b = _mm512_set1_epi16(-1);
14336        let m = _mm512_cmplt_epi16_mask(a, b);
14337        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14338    }
14339
14340    #[simd_test(enable = "avx512bw")]
14341    unsafe fn test_mm512_mask_cmplt_epi16_mask() {
14342        let a = _mm512_set1_epi16(-2);
14343        let b = _mm512_set1_epi16(-1);
14344        let mask = 0b01010101_01010101_01010101_01010101;
14345        let r = _mm512_mask_cmplt_epi16_mask(mask, a, b);
14346        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14347    }
14348
14349    #[simd_test(enable = "avx512bw,avx512vl")]
14350    unsafe fn test_mm256_cmplt_epi16_mask() {
14351        let a = _mm256_set1_epi16(-2);
14352        let b = _mm256_set1_epi16(-1);
14353        let m = _mm256_cmplt_epi16_mask(a, b);
14354        assert_eq!(m, 0b11111111_11111111);
14355    }
14356
14357    #[simd_test(enable = "avx512bw,avx512vl")]
14358    unsafe fn test_mm256_mask_cmplt_epi16_mask() {
14359        let a = _mm256_set1_epi16(-2);
14360        let b = _mm256_set1_epi16(-1);
14361        let mask = 0b01010101_01010101;
14362        let r = _mm256_mask_cmplt_epi16_mask(mask, a, b);
14363        assert_eq!(r, 0b01010101_01010101);
14364    }
14365
14366    #[simd_test(enable = "avx512bw,avx512vl")]
14367    unsafe fn test_mm_cmplt_epi16_mask() {
14368        let a = _mm_set1_epi16(-2);
14369        let b = _mm_set1_epi16(-1);
14370        let m = _mm_cmplt_epi16_mask(a, b);
14371        assert_eq!(m, 0b11111111);
14372    }
14373
14374    #[simd_test(enable = "avx512bw,avx512vl")]
14375    unsafe fn test_mm_mask_cmplt_epi16_mask() {
14376        let a = _mm_set1_epi16(-2);
14377        let b = _mm_set1_epi16(-1);
14378        let mask = 0b01010101;
14379        let r = _mm_mask_cmplt_epi16_mask(mask, a, b);
14380        assert_eq!(r, 0b01010101);
14381    }
14382
14383    #[simd_test(enable = "avx512bw")]
14384    unsafe fn test_mm512_cmplt_epi8_mask() {
14385        let a = _mm512_set1_epi8(-2);
14386        let b = _mm512_set1_epi8(-1);
14387        let m = _mm512_cmplt_epi8_mask(a, b);
14388        assert_eq!(
14389            m,
14390            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14391        );
14392    }
14393
14394    #[simd_test(enable = "avx512bw")]
14395    unsafe fn test_mm512_mask_cmplt_epi8_mask() {
14396        let a = _mm512_set1_epi8(-2);
14397        let b = _mm512_set1_epi8(-1);
14398        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14399        let r = _mm512_mask_cmplt_epi8_mask(mask, a, b);
14400        assert_eq!(
14401            r,
14402            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14403        );
14404    }
14405
14406    #[simd_test(enable = "avx512bw,avx512vl")]
14407    unsafe fn test_mm256_cmplt_epi8_mask() {
14408        let a = _mm256_set1_epi8(-2);
14409        let b = _mm256_set1_epi8(-1);
14410        let m = _mm256_cmplt_epi8_mask(a, b);
14411        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14412    }
14413
14414    #[simd_test(enable = "avx512bw,avx512vl")]
14415    unsafe fn test_mm256_mask_cmplt_epi8_mask() {
14416        let a = _mm256_set1_epi8(-2);
14417        let b = _mm256_set1_epi8(-1);
14418        let mask = 0b01010101_01010101_01010101_01010101;
14419        let r = _mm256_mask_cmplt_epi8_mask(mask, a, b);
14420        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14421    }
14422
14423    #[simd_test(enable = "avx512bw,avx512vl")]
14424    unsafe fn test_mm_cmplt_epi8_mask() {
14425        let a = _mm_set1_epi8(-2);
14426        let b = _mm_set1_epi8(-1);
14427        let m = _mm_cmplt_epi8_mask(a, b);
14428        assert_eq!(m, 0b11111111_11111111);
14429    }
14430
14431    #[simd_test(enable = "avx512bw,avx512vl")]
14432    unsafe fn test_mm_mask_cmplt_epi8_mask() {
14433        let a = _mm_set1_epi8(-2);
14434        let b = _mm_set1_epi8(-1);
14435        let mask = 0b01010101_01010101;
14436        let r = _mm_mask_cmplt_epi8_mask(mask, a, b);
14437        assert_eq!(r, 0b01010101_01010101);
14438    }
14439
14440    #[simd_test(enable = "avx512bw")]
14441    unsafe fn test_mm512_cmpgt_epu16_mask() {
14442        let a = _mm512_set1_epi16(2);
14443        let b = _mm512_set1_epi16(1);
14444        let m = _mm512_cmpgt_epu16_mask(a, b);
14445        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14446    }
14447
14448    #[simd_test(enable = "avx512bw")]
14449    unsafe fn test_mm512_mask_cmpgt_epu16_mask() {
14450        let a = _mm512_set1_epi16(2);
14451        let b = _mm512_set1_epi16(1);
14452        let mask = 0b01010101_01010101_01010101_01010101;
14453        let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b);
14454        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14455    }
14456
14457    #[simd_test(enable = "avx512bw,avx512vl")]
14458    unsafe fn test_mm256_cmpgt_epu16_mask() {
14459        let a = _mm256_set1_epi16(2);
14460        let b = _mm256_set1_epi16(1);
14461        let m = _mm256_cmpgt_epu16_mask(a, b);
14462        assert_eq!(m, 0b11111111_11111111);
14463    }
14464
14465    #[simd_test(enable = "avx512bw,avx512vl")]
14466    unsafe fn test_mm256_mask_cmpgt_epu16_mask() {
14467        let a = _mm256_set1_epi16(2);
14468        let b = _mm256_set1_epi16(1);
14469        let mask = 0b01010101_01010101;
14470        let r = _mm256_mask_cmpgt_epu16_mask(mask, a, b);
14471        assert_eq!(r, 0b01010101_01010101);
14472    }
14473
14474    #[simd_test(enable = "avx512bw,avx512vl")]
14475    unsafe fn test_mm_cmpgt_epu16_mask() {
14476        let a = _mm_set1_epi16(2);
14477        let b = _mm_set1_epi16(1);
14478        let m = _mm_cmpgt_epu16_mask(a, b);
14479        assert_eq!(m, 0b11111111);
14480    }
14481
14482    #[simd_test(enable = "avx512bw,avx512vl")]
14483    unsafe fn test_mm_mask_cmpgt_epu16_mask() {
14484        let a = _mm_set1_epi16(2);
14485        let b = _mm_set1_epi16(1);
14486        let mask = 0b01010101;
14487        let r = _mm_mask_cmpgt_epu16_mask(mask, a, b);
14488        assert_eq!(r, 0b01010101);
14489    }
14490
14491    #[simd_test(enable = "avx512bw")]
14492    unsafe fn test_mm512_cmpgt_epu8_mask() {
14493        let a = _mm512_set1_epi8(2);
14494        let b = _mm512_set1_epi8(1);
14495        let m = _mm512_cmpgt_epu8_mask(a, b);
14496        assert_eq!(
14497            m,
14498            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14499        );
14500    }
14501
14502    #[simd_test(enable = "avx512bw")]
14503    unsafe fn test_mm512_mask_cmpgt_epu8_mask() {
14504        let a = _mm512_set1_epi8(2);
14505        let b = _mm512_set1_epi8(1);
14506        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14507        let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b);
14508        assert_eq!(
14509            r,
14510            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14511        );
14512    }
14513
14514    #[simd_test(enable = "avx512bw,avx512vl")]
14515    unsafe fn test_mm256_cmpgt_epu8_mask() {
14516        let a = _mm256_set1_epi8(2);
14517        let b = _mm256_set1_epi8(1);
14518        let m = _mm256_cmpgt_epu8_mask(a, b);
14519        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14520    }
14521
14522    #[simd_test(enable = "avx512bw,avx512vl")]
14523    unsafe fn test_mm256_mask_cmpgt_epu8_mask() {
14524        let a = _mm256_set1_epi8(2);
14525        let b = _mm256_set1_epi8(1);
14526        let mask = 0b01010101_01010101_01010101_01010101;
14527        let r = _mm256_mask_cmpgt_epu8_mask(mask, a, b);
14528        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14529    }
14530
14531    #[simd_test(enable = "avx512bw,avx512vl")]
14532    unsafe fn test_mm_cmpgt_epu8_mask() {
14533        let a = _mm_set1_epi8(2);
14534        let b = _mm_set1_epi8(1);
14535        let m = _mm_cmpgt_epu8_mask(a, b);
14536        assert_eq!(m, 0b11111111_11111111);
14537    }
14538
14539    #[simd_test(enable = "avx512bw,avx512vl")]
14540    unsafe fn test_mm_mask_cmpgt_epu8_mask() {
14541        let a = _mm_set1_epi8(2);
14542        let b = _mm_set1_epi8(1);
14543        let mask = 0b01010101_01010101;
14544        let r = _mm_mask_cmpgt_epu8_mask(mask, a, b);
14545        assert_eq!(r, 0b01010101_01010101);
14546    }
14547
14548    #[simd_test(enable = "avx512bw")]
14549    unsafe fn test_mm512_cmpgt_epi16_mask() {
14550        let a = _mm512_set1_epi16(2);
14551        let b = _mm512_set1_epi16(-1);
14552        let m = _mm512_cmpgt_epi16_mask(a, b);
14553        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14554    }
14555
14556    #[simd_test(enable = "avx512bw")]
14557    unsafe fn test_mm512_mask_cmpgt_epi16_mask() {
14558        let a = _mm512_set1_epi16(2);
14559        let b = _mm512_set1_epi16(-1);
14560        let mask = 0b01010101_01010101_01010101_01010101;
14561        let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b);
14562        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14563    }
14564
14565    #[simd_test(enable = "avx512bw,avx512vl")]
14566    unsafe fn test_mm256_cmpgt_epi16_mask() {
14567        let a = _mm256_set1_epi16(2);
14568        let b = _mm256_set1_epi16(-1);
14569        let m = _mm256_cmpgt_epi16_mask(a, b);
14570        assert_eq!(m, 0b11111111_11111111);
14571    }
14572
14573    #[simd_test(enable = "avx512bw,avx512vl")]
14574    unsafe fn test_mm256_mask_cmpgt_epi16_mask() {
14575        let a = _mm256_set1_epi16(2);
14576        let b = _mm256_set1_epi16(-1);
14577        let mask = 0b001010101_01010101;
14578        let r = _mm256_mask_cmpgt_epi16_mask(mask, a, b);
14579        assert_eq!(r, 0b01010101_01010101);
14580    }
14581
14582    #[simd_test(enable = "avx512bw,avx512vl")]
14583    unsafe fn test_mm_cmpgt_epi16_mask() {
14584        let a = _mm_set1_epi16(2);
14585        let b = _mm_set1_epi16(-1);
14586        let m = _mm_cmpgt_epi16_mask(a, b);
14587        assert_eq!(m, 0b11111111);
14588    }
14589
14590    #[simd_test(enable = "avx512bw,avx512vl")]
14591    unsafe fn test_mm_mask_cmpgt_epi16_mask() {
14592        let a = _mm_set1_epi16(2);
14593        let b = _mm_set1_epi16(-1);
14594        let mask = 0b01010101;
14595        let r = _mm_mask_cmpgt_epi16_mask(mask, a, b);
14596        assert_eq!(r, 0b01010101);
14597    }
14598
14599    #[simd_test(enable = "avx512bw")]
14600    unsafe fn test_mm512_cmpgt_epi8_mask() {
14601        let a = _mm512_set1_epi8(2);
14602        let b = _mm512_set1_epi8(-1);
14603        let m = _mm512_cmpgt_epi8_mask(a, b);
14604        assert_eq!(
14605            m,
14606            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14607        );
14608    }
14609
14610    #[simd_test(enable = "avx512bw")]
14611    unsafe fn test_mm512_mask_cmpgt_epi8_mask() {
14612        let a = _mm512_set1_epi8(2);
14613        let b = _mm512_set1_epi8(-1);
14614        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14615        let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b);
14616        assert_eq!(
14617            r,
14618            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14619        );
14620    }
14621
14622    #[simd_test(enable = "avx512bw,avx512vl")]
14623    unsafe fn test_mm256_cmpgt_epi8_mask() {
14624        let a = _mm256_set1_epi8(2);
14625        let b = _mm256_set1_epi8(-1);
14626        let m = _mm256_cmpgt_epi8_mask(a, b);
14627        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14628    }
14629
14630    #[simd_test(enable = "avx512bw,avx512vl")]
14631    unsafe fn test_mm256_mask_cmpgt_epi8_mask() {
14632        let a = _mm256_set1_epi8(2);
14633        let b = _mm256_set1_epi8(-1);
14634        let mask = 0b01010101_01010101_01010101_01010101;
14635        let r = _mm256_mask_cmpgt_epi8_mask(mask, a, b);
14636        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14637    }
14638
14639    #[simd_test(enable = "avx512bw,avx512vl")]
14640    unsafe fn test_mm_cmpgt_epi8_mask() {
14641        let a = _mm_set1_epi8(2);
14642        let b = _mm_set1_epi8(-1);
14643        let m = _mm_cmpgt_epi8_mask(a, b);
14644        assert_eq!(m, 0b11111111_11111111);
14645    }
14646
14647    #[simd_test(enable = "avx512bw,avx512vl")]
14648    unsafe fn test_mm_mask_cmpgt_epi8_mask() {
14649        let a = _mm_set1_epi8(2);
14650        let b = _mm_set1_epi8(-1);
14651        let mask = 0b01010101_01010101;
14652        let r = _mm_mask_cmpgt_epi8_mask(mask, a, b);
14653        assert_eq!(r, 0b01010101_01010101);
14654    }
14655
14656    #[simd_test(enable = "avx512bw")]
14657    unsafe fn test_mm512_cmple_epu16_mask() {
14658        let a = _mm512_set1_epi16(-1);
14659        let b = _mm512_set1_epi16(-1);
14660        let m = _mm512_cmple_epu16_mask(a, b);
14661        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14662    }
14663
14664    #[simd_test(enable = "avx512bw")]
14665    unsafe fn test_mm512_mask_cmple_epu16_mask() {
14666        let a = _mm512_set1_epi16(-1);
14667        let b = _mm512_set1_epi16(-1);
14668        let mask = 0b01010101_01010101_01010101_01010101;
14669        let r = _mm512_mask_cmple_epu16_mask(mask, a, b);
14670        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14671    }
14672
14673    #[simd_test(enable = "avx512bw,avx512vl")]
14674    unsafe fn test_mm256_cmple_epu16_mask() {
14675        let a = _mm256_set1_epi16(-1);
14676        let b = _mm256_set1_epi16(-1);
14677        let m = _mm256_cmple_epu16_mask(a, b);
14678        assert_eq!(m, 0b11111111_11111111);
14679    }
14680
14681    #[simd_test(enable = "avx512bw,avx512vl")]
14682    unsafe fn test_mm256_mask_cmple_epu16_mask() {
14683        let a = _mm256_set1_epi16(-1);
14684        let b = _mm256_set1_epi16(-1);
14685        let mask = 0b01010101_01010101;
14686        let r = _mm256_mask_cmple_epu16_mask(mask, a, b);
14687        assert_eq!(r, 0b01010101_01010101);
14688    }
14689
14690    #[simd_test(enable = "avx512bw,avx512vl")]
14691    unsafe fn test_mm_cmple_epu16_mask() {
14692        let a = _mm_set1_epi16(-1);
14693        let b = _mm_set1_epi16(-1);
14694        let m = _mm_cmple_epu16_mask(a, b);
14695        assert_eq!(m, 0b11111111);
14696    }
14697
14698    #[simd_test(enable = "avx512bw,avx512vl")]
14699    unsafe fn test_mm_mask_cmple_epu16_mask() {
14700        let a = _mm_set1_epi16(-1);
14701        let b = _mm_set1_epi16(-1);
14702        let mask = 0b01010101;
14703        let r = _mm_mask_cmple_epu16_mask(mask, a, b);
14704        assert_eq!(r, 0b01010101);
14705    }
14706
14707    #[simd_test(enable = "avx512bw")]
14708    unsafe fn test_mm512_cmple_epu8_mask() {
14709        let a = _mm512_set1_epi8(-1);
14710        let b = _mm512_set1_epi8(-1);
14711        let m = _mm512_cmple_epu8_mask(a, b);
14712        assert_eq!(
14713            m,
14714            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14715        );
14716    }
14717
14718    #[simd_test(enable = "avx512bw")]
14719    unsafe fn test_mm512_mask_cmple_epu8_mask() {
14720        let a = _mm512_set1_epi8(-1);
14721        let b = _mm512_set1_epi8(-1);
14722        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14723        let r = _mm512_mask_cmple_epu8_mask(mask, a, b);
14724        assert_eq!(
14725            r,
14726            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14727        );
14728    }
14729
14730    #[simd_test(enable = "avx512bw,avx512vl")]
14731    unsafe fn test_mm256_cmple_epu8_mask() {
14732        let a = _mm256_set1_epi8(-1);
14733        let b = _mm256_set1_epi8(-1);
14734        let m = _mm256_cmple_epu8_mask(a, b);
14735        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14736    }
14737
14738    #[simd_test(enable = "avx512bw,avx512vl")]
14739    unsafe fn test_mm256_mask_cmple_epu8_mask() {
14740        let a = _mm256_set1_epi8(-1);
14741        let b = _mm256_set1_epi8(-1);
14742        let mask = 0b01010101_01010101_01010101_01010101;
14743        let r = _mm256_mask_cmple_epu8_mask(mask, a, b);
14744        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14745    }
14746
14747    #[simd_test(enable = "avx512bw,avx512vl")]
14748    unsafe fn test_mm_cmple_epu8_mask() {
14749        let a = _mm_set1_epi8(-1);
14750        let b = _mm_set1_epi8(-1);
14751        let m = _mm_cmple_epu8_mask(a, b);
14752        assert_eq!(m, 0b11111111_11111111);
14753    }
14754
14755    #[simd_test(enable = "avx512bw,avx512vl")]
14756    unsafe fn test_mm_mask_cmple_epu8_mask() {
14757        let a = _mm_set1_epi8(-1);
14758        let b = _mm_set1_epi8(-1);
14759        let mask = 0b01010101_01010101;
14760        let r = _mm_mask_cmple_epu8_mask(mask, a, b);
14761        assert_eq!(r, 0b01010101_01010101);
14762    }
14763
14764    #[simd_test(enable = "avx512bw")]
14765    unsafe fn test_mm512_cmple_epi16_mask() {
14766        let a = _mm512_set1_epi16(-1);
14767        let b = _mm512_set1_epi16(-1);
14768        let m = _mm512_cmple_epi16_mask(a, b);
14769        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14770    }
14771
14772    #[simd_test(enable = "avx512bw")]
14773    unsafe fn test_mm512_mask_cmple_epi16_mask() {
14774        let a = _mm512_set1_epi16(-1);
14775        let b = _mm512_set1_epi16(-1);
14776        let mask = 0b01010101_01010101_01010101_01010101;
14777        let r = _mm512_mask_cmple_epi16_mask(mask, a, b);
14778        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14779    }
14780
14781    #[simd_test(enable = "avx512bw,avx512vl")]
14782    unsafe fn test_mm256_cmple_epi16_mask() {
14783        let a = _mm256_set1_epi16(-1);
14784        let b = _mm256_set1_epi16(-1);
14785        let m = _mm256_cmple_epi16_mask(a, b);
14786        assert_eq!(m, 0b11111111_11111111);
14787    }
14788
14789    #[simd_test(enable = "avx512bw,avx512vl")]
14790    unsafe fn test_mm256_mask_cmple_epi16_mask() {
14791        let a = _mm256_set1_epi16(-1);
14792        let b = _mm256_set1_epi16(-1);
14793        let mask = 0b01010101_01010101;
14794        let r = _mm256_mask_cmple_epi16_mask(mask, a, b);
14795        assert_eq!(r, 0b01010101_01010101);
14796    }
14797
14798    #[simd_test(enable = "avx512bw,avx512vl")]
14799    unsafe fn test_mm_cmple_epi16_mask() {
14800        let a = _mm_set1_epi16(-1);
14801        let b = _mm_set1_epi16(-1);
14802        let m = _mm_cmple_epi16_mask(a, b);
14803        assert_eq!(m, 0b11111111);
14804    }
14805
14806    #[simd_test(enable = "avx512bw,avx512vl")]
14807    unsafe fn test_mm_mask_cmple_epi16_mask() {
14808        let a = _mm_set1_epi16(-1);
14809        let b = _mm_set1_epi16(-1);
14810        let mask = 0b01010101;
14811        let r = _mm_mask_cmple_epi16_mask(mask, a, b);
14812        assert_eq!(r, 0b01010101);
14813    }
14814
14815    #[simd_test(enable = "avx512bw")]
14816    unsafe fn test_mm512_cmple_epi8_mask() {
14817        let a = _mm512_set1_epi8(-1);
14818        let b = _mm512_set1_epi8(-1);
14819        let m = _mm512_cmple_epi8_mask(a, b);
14820        assert_eq!(
14821            m,
14822            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14823        );
14824    }
14825
14826    #[simd_test(enable = "avx512bw")]
14827    unsafe fn test_mm512_mask_cmple_epi8_mask() {
14828        let a = _mm512_set1_epi8(-1);
14829        let b = _mm512_set1_epi8(-1);
14830        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14831        let r = _mm512_mask_cmple_epi8_mask(mask, a, b);
14832        assert_eq!(
14833            r,
14834            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14835        );
14836    }
14837
14838    #[simd_test(enable = "avx512bw,avx512vl")]
14839    unsafe fn test_mm256_cmple_epi8_mask() {
14840        let a = _mm256_set1_epi8(-1);
14841        let b = _mm256_set1_epi8(-1);
14842        let m = _mm256_cmple_epi8_mask(a, b);
14843        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14844    }
14845
14846    #[simd_test(enable = "avx512bw,avx512vl")]
14847    unsafe fn test_mm256_mask_cmple_epi8_mask() {
14848        let a = _mm256_set1_epi8(-1);
14849        let b = _mm256_set1_epi8(-1);
14850        let mask = 0b01010101_01010101_01010101_01010101;
14851        let r = _mm256_mask_cmple_epi8_mask(mask, a, b);
14852        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14853    }
14854
14855    #[simd_test(enable = "avx512bw,avx512vl")]
14856    unsafe fn test_mm_cmple_epi8_mask() {
14857        let a = _mm_set1_epi8(-1);
14858        let b = _mm_set1_epi8(-1);
14859        let m = _mm_cmple_epi8_mask(a, b);
14860        assert_eq!(m, 0b11111111_11111111);
14861    }
14862
14863    #[simd_test(enable = "avx512bw,avx512vl")]
14864    unsafe fn test_mm_mask_cmple_epi8_mask() {
14865        let a = _mm_set1_epi8(-1);
14866        let b = _mm_set1_epi8(-1);
14867        let mask = 0b01010101_01010101;
14868        let r = _mm_mask_cmple_epi8_mask(mask, a, b);
14869        assert_eq!(r, 0b01010101_01010101);
14870    }
14871
14872    #[simd_test(enable = "avx512bw")]
14873    unsafe fn test_mm512_cmpge_epu16_mask() {
14874        let a = _mm512_set1_epi16(1);
14875        let b = _mm512_set1_epi16(1);
14876        let m = _mm512_cmpge_epu16_mask(a, b);
14877        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14878    }
14879
14880    #[simd_test(enable = "avx512bw")]
14881    unsafe fn test_mm512_mask_cmpge_epu16_mask() {
14882        let a = _mm512_set1_epi16(1);
14883        let b = _mm512_set1_epi16(1);
14884        let mask = 0b01010101_01010101_01010101_01010101;
14885        let r = _mm512_mask_cmpge_epu16_mask(mask, a, b);
14886        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14887    }
14888
14889    #[simd_test(enable = "avx512bw,avx512vl")]
14890    unsafe fn test_mm256_cmpge_epu16_mask() {
14891        let a = _mm256_set1_epi16(1);
14892        let b = _mm256_set1_epi16(1);
14893        let m = _mm256_cmpge_epu16_mask(a, b);
14894        assert_eq!(m, 0b11111111_11111111);
14895    }
14896
14897    #[simd_test(enable = "avx512bw,avx512vl")]
14898    unsafe fn test_mm256_mask_cmpge_epu16_mask() {
14899        let a = _mm256_set1_epi16(1);
14900        let b = _mm256_set1_epi16(1);
14901        let mask = 0b01010101_01010101;
14902        let r = _mm256_mask_cmpge_epu16_mask(mask, a, b);
14903        assert_eq!(r, 0b01010101_01010101);
14904    }
14905
14906    #[simd_test(enable = "avx512bw,avx512vl")]
14907    unsafe fn test_mm_cmpge_epu16_mask() {
14908        let a = _mm_set1_epi16(1);
14909        let b = _mm_set1_epi16(1);
14910        let m = _mm_cmpge_epu16_mask(a, b);
14911        assert_eq!(m, 0b11111111);
14912    }
14913
14914    #[simd_test(enable = "avx512bw,avx512vl")]
14915    unsafe fn test_mm_mask_cmpge_epu16_mask() {
14916        let a = _mm_set1_epi16(1);
14917        let b = _mm_set1_epi16(1);
14918        let mask = 0b01010101;
14919        let r = _mm_mask_cmpge_epu16_mask(mask, a, b);
14920        assert_eq!(r, 0b01010101);
14921    }
14922
14923    #[simd_test(enable = "avx512bw")]
14924    unsafe fn test_mm512_cmpge_epu8_mask() {
14925        let a = _mm512_set1_epi8(1);
14926        let b = _mm512_set1_epi8(1);
14927        let m = _mm512_cmpge_epu8_mask(a, b);
14928        assert_eq!(
14929            m,
14930            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14931        );
14932    }
14933
14934    #[simd_test(enable = "avx512bw")]
14935    unsafe fn test_mm512_mask_cmpge_epu8_mask() {
14936        let a = _mm512_set1_epi8(1);
14937        let b = _mm512_set1_epi8(1);
14938        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14939        let r = _mm512_mask_cmpge_epu8_mask(mask, a, b);
14940        assert_eq!(
14941            r,
14942            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14943        );
14944    }
14945
14946    #[simd_test(enable = "avx512bw,avx512vl")]
14947    unsafe fn test_mm256_cmpge_epu8_mask() {
14948        let a = _mm256_set1_epi8(1);
14949        let b = _mm256_set1_epi8(1);
14950        let m = _mm256_cmpge_epu8_mask(a, b);
14951        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14952    }
14953
14954    #[simd_test(enable = "avx512bw,avx512vl")]
14955    unsafe fn test_mm256_mask_cmpge_epu8_mask() {
14956        let a = _mm256_set1_epi8(1);
14957        let b = _mm256_set1_epi8(1);
14958        let mask = 0b01010101_01010101_01010101_01010101;
14959        let r = _mm256_mask_cmpge_epu8_mask(mask, a, b);
14960        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14961    }
14962
14963    #[simd_test(enable = "avx512bw,avx512vl")]
14964    unsafe fn test_mm_cmpge_epu8_mask() {
14965        let a = _mm_set1_epi8(1);
14966        let b = _mm_set1_epi8(1);
14967        let m = _mm_cmpge_epu8_mask(a, b);
14968        assert_eq!(m, 0b11111111_11111111);
14969    }
14970
14971    #[simd_test(enable = "avx512bw,avx512vl")]
14972    unsafe fn test_mm_mask_cmpge_epu8_mask() {
14973        let a = _mm_set1_epi8(1);
14974        let b = _mm_set1_epi8(1);
14975        let mask = 0b01010101_01010101;
14976        let r = _mm_mask_cmpge_epu8_mask(mask, a, b);
14977        assert_eq!(r, 0b01010101_01010101);
14978    }
14979
14980    #[simd_test(enable = "avx512bw")]
14981    unsafe fn test_mm512_cmpge_epi16_mask() {
14982        let a = _mm512_set1_epi16(-1);
14983        let b = _mm512_set1_epi16(-1);
14984        let m = _mm512_cmpge_epi16_mask(a, b);
14985        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14986    }
14987
14988    #[simd_test(enable = "avx512bw")]
14989    unsafe fn test_mm512_mask_cmpge_epi16_mask() {
14990        let a = _mm512_set1_epi16(-1);
14991        let b = _mm512_set1_epi16(-1);
14992        let mask = 0b01010101_01010101_01010101_01010101;
14993        let r = _mm512_mask_cmpge_epi16_mask(mask, a, b);
14994        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14995    }
14996
14997    #[simd_test(enable = "avx512bw,avx512vl")]
14998    unsafe fn test_mm256_cmpge_epi16_mask() {
14999        let a = _mm256_set1_epi16(-1);
15000        let b = _mm256_set1_epi16(-1);
15001        let m = _mm256_cmpge_epi16_mask(a, b);
15002        assert_eq!(m, 0b11111111_11111111);
15003    }
15004
15005    #[simd_test(enable = "avx512bw,avx512vl")]
15006    unsafe fn test_mm256_mask_cmpge_epi16_mask() {
15007        let a = _mm256_set1_epi16(-1);
15008        let b = _mm256_set1_epi16(-1);
15009        let mask = 0b01010101_01010101;
15010        let r = _mm256_mask_cmpge_epi16_mask(mask, a, b);
15011        assert_eq!(r, 0b01010101_01010101);
15012    }
15013
15014    #[simd_test(enable = "avx512bw,avx512vl")]
15015    unsafe fn test_mm_cmpge_epi16_mask() {
15016        let a = _mm_set1_epi16(-1);
15017        let b = _mm_set1_epi16(-1);
15018        let m = _mm_cmpge_epi16_mask(a, b);
15019        assert_eq!(m, 0b11111111);
15020    }
15021
15022    #[simd_test(enable = "avx512bw,avx512vl")]
15023    unsafe fn test_mm_mask_cmpge_epi16_mask() {
15024        let a = _mm_set1_epi16(-1);
15025        let b = _mm_set1_epi16(-1);
15026        let mask = 0b01010101;
15027        let r = _mm_mask_cmpge_epi16_mask(mask, a, b);
15028        assert_eq!(r, 0b01010101);
15029    }
15030
15031    #[simd_test(enable = "avx512bw")]
15032    unsafe fn test_mm512_cmpge_epi8_mask() {
15033        let a = _mm512_set1_epi8(-1);
15034        let b = _mm512_set1_epi8(-1);
15035        let m = _mm512_cmpge_epi8_mask(a, b);
15036        assert_eq!(
15037            m,
15038            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15039        );
15040    }
15041
15042    #[simd_test(enable = "avx512bw")]
15043    unsafe fn test_mm512_mask_cmpge_epi8_mask() {
15044        let a = _mm512_set1_epi8(-1);
15045        let b = _mm512_set1_epi8(-1);
15046        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15047        let r = _mm512_mask_cmpge_epi8_mask(mask, a, b);
15048        assert_eq!(
15049            r,
15050            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15051        );
15052    }
15053
15054    #[simd_test(enable = "avx512bw,avx512vl")]
15055    unsafe fn test_mm256_cmpge_epi8_mask() {
15056        let a = _mm256_set1_epi8(-1);
15057        let b = _mm256_set1_epi8(-1);
15058        let m = _mm256_cmpge_epi8_mask(a, b);
15059        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15060    }
15061
15062    #[simd_test(enable = "avx512bw,avx512vl")]
15063    unsafe fn test_mm256_mask_cmpge_epi8_mask() {
15064        let a = _mm256_set1_epi8(-1);
15065        let b = _mm256_set1_epi8(-1);
15066        let mask = 0b01010101_01010101_01010101_01010101;
15067        let r = _mm256_mask_cmpge_epi8_mask(mask, a, b);
15068        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15069    }
15070
15071    #[simd_test(enable = "avx512bw,avx512vl")]
15072    unsafe fn test_mm_cmpge_epi8_mask() {
15073        let a = _mm_set1_epi8(-1);
15074        let b = _mm_set1_epi8(-1);
15075        let m = _mm_cmpge_epi8_mask(a, b);
15076        assert_eq!(m, 0b11111111_11111111);
15077    }
15078
15079    #[simd_test(enable = "avx512bw,avx512vl")]
15080    unsafe fn test_mm_mask_cmpge_epi8_mask() {
15081        let a = _mm_set1_epi8(-1);
15082        let b = _mm_set1_epi8(-1);
15083        let mask = 0b01010101_01010101;
15084        let r = _mm_mask_cmpge_epi8_mask(mask, a, b);
15085        assert_eq!(r, 0b01010101_01010101);
15086    }
15087
15088    #[simd_test(enable = "avx512bw")]
15089    unsafe fn test_mm512_cmpeq_epu16_mask() {
15090        let a = _mm512_set1_epi16(1);
15091        let b = _mm512_set1_epi16(1);
15092        let m = _mm512_cmpeq_epu16_mask(a, b);
15093        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15094    }
15095
15096    #[simd_test(enable = "avx512bw")]
15097    unsafe fn test_mm512_mask_cmpeq_epu16_mask() {
15098        let a = _mm512_set1_epi16(1);
15099        let b = _mm512_set1_epi16(1);
15100        let mask = 0b01010101_01010101_01010101_01010101;
15101        let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b);
15102        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15103    }
15104
15105    #[simd_test(enable = "avx512bw,avx512vl")]
15106    unsafe fn test_mm256_cmpeq_epu16_mask() {
15107        let a = _mm256_set1_epi16(1);
15108        let b = _mm256_set1_epi16(1);
15109        let m = _mm256_cmpeq_epu16_mask(a, b);
15110        assert_eq!(m, 0b11111111_11111111);
15111    }
15112
15113    #[simd_test(enable = "avx512bw,avx512vl")]
15114    unsafe fn test_mm256_mask_cmpeq_epu16_mask() {
15115        let a = _mm256_set1_epi16(1);
15116        let b = _mm256_set1_epi16(1);
15117        let mask = 0b01010101_01010101;
15118        let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b);
15119        assert_eq!(r, 0b01010101_01010101);
15120    }
15121
15122    #[simd_test(enable = "avx512bw,avx512vl")]
15123    unsafe fn test_mm_cmpeq_epu16_mask() {
15124        let a = _mm_set1_epi16(1);
15125        let b = _mm_set1_epi16(1);
15126        let m = _mm_cmpeq_epu16_mask(a, b);
15127        assert_eq!(m, 0b11111111);
15128    }
15129
15130    #[simd_test(enable = "avx512bw,avx512vl")]
15131    unsafe fn test_mm_mask_cmpeq_epu16_mask() {
15132        let a = _mm_set1_epi16(1);
15133        let b = _mm_set1_epi16(1);
15134        let mask = 0b01010101;
15135        let r = _mm_mask_cmpeq_epu16_mask(mask, a, b);
15136        assert_eq!(r, 0b01010101);
15137    }
15138
15139    #[simd_test(enable = "avx512bw")]
15140    unsafe fn test_mm512_cmpeq_epu8_mask() {
15141        let a = _mm512_set1_epi8(1);
15142        let b = _mm512_set1_epi8(1);
15143        let m = _mm512_cmpeq_epu8_mask(a, b);
15144        assert_eq!(
15145            m,
15146            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15147        );
15148    }
15149
15150    #[simd_test(enable = "avx512bw")]
15151    unsafe fn test_mm512_mask_cmpeq_epu8_mask() {
15152        let a = _mm512_set1_epi8(1);
15153        let b = _mm512_set1_epi8(1);
15154        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15155        let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b);
15156        assert_eq!(
15157            r,
15158            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15159        );
15160    }
15161
15162    #[simd_test(enable = "avx512bw,avx512vl")]
15163    unsafe fn test_mm256_cmpeq_epu8_mask() {
15164        let a = _mm256_set1_epi8(1);
15165        let b = _mm256_set1_epi8(1);
15166        let m = _mm256_cmpeq_epu8_mask(a, b);
15167        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15168    }
15169
15170    #[simd_test(enable = "avx512bw,avx512vl")]
15171    unsafe fn test_mm256_mask_cmpeq_epu8_mask() {
15172        let a = _mm256_set1_epi8(1);
15173        let b = _mm256_set1_epi8(1);
15174        let mask = 0b01010101_01010101_01010101_01010101;
15175        let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b);
15176        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15177    }
15178
15179    #[simd_test(enable = "avx512bw,avx512vl")]
15180    unsafe fn test_mm_cmpeq_epu8_mask() {
15181        let a = _mm_set1_epi8(1);
15182        let b = _mm_set1_epi8(1);
15183        let m = _mm_cmpeq_epu8_mask(a, b);
15184        assert_eq!(m, 0b11111111_11111111);
15185    }
15186
15187    #[simd_test(enable = "avx512bw,avx512vl")]
15188    unsafe fn test_mm_mask_cmpeq_epu8_mask() {
15189        let a = _mm_set1_epi8(1);
15190        let b = _mm_set1_epi8(1);
15191        let mask = 0b01010101_01010101;
15192        let r = _mm_mask_cmpeq_epu8_mask(mask, a, b);
15193        assert_eq!(r, 0b01010101_01010101);
15194    }
15195
15196    #[simd_test(enable = "avx512bw")]
15197    unsafe fn test_mm512_cmpeq_epi16_mask() {
15198        let a = _mm512_set1_epi16(-1);
15199        let b = _mm512_set1_epi16(-1);
15200        let m = _mm512_cmpeq_epi16_mask(a, b);
15201        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15202    }
15203
15204    #[simd_test(enable = "avx512bw")]
15205    unsafe fn test_mm512_mask_cmpeq_epi16_mask() {
15206        let a = _mm512_set1_epi16(-1);
15207        let b = _mm512_set1_epi16(-1);
15208        let mask = 0b01010101_01010101_01010101_01010101;
15209        let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b);
15210        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15211    }
15212
15213    #[simd_test(enable = "avx512bw,avx512vl")]
15214    unsafe fn test_mm256_cmpeq_epi16_mask() {
15215        let a = _mm256_set1_epi16(-1);
15216        let b = _mm256_set1_epi16(-1);
15217        let m = _mm256_cmpeq_epi16_mask(a, b);
15218        assert_eq!(m, 0b11111111_11111111);
15219    }
15220
15221    #[simd_test(enable = "avx512bw,avx512vl")]
15222    unsafe fn test_mm256_mask_cmpeq_epi16_mask() {
15223        let a = _mm256_set1_epi16(-1);
15224        let b = _mm256_set1_epi16(-1);
15225        let mask = 0b01010101_01010101;
15226        let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b);
15227        assert_eq!(r, 0b01010101_01010101);
15228    }
15229
15230    #[simd_test(enable = "avx512bw,avx512vl")]
15231    unsafe fn test_mm_cmpeq_epi16_mask() {
15232        let a = _mm_set1_epi16(-1);
15233        let b = _mm_set1_epi16(-1);
15234        let m = _mm_cmpeq_epi16_mask(a, b);
15235        assert_eq!(m, 0b11111111);
15236    }
15237
15238    #[simd_test(enable = "avx512bw,avx512vl")]
15239    unsafe fn test_mm_mask_cmpeq_epi16_mask() {
15240        let a = _mm_set1_epi16(-1);
15241        let b = _mm_set1_epi16(-1);
15242        let mask = 0b01010101;
15243        let r = _mm_mask_cmpeq_epi16_mask(mask, a, b);
15244        assert_eq!(r, 0b01010101);
15245    }
15246
15247    #[simd_test(enable = "avx512bw")]
15248    unsafe fn test_mm512_cmpeq_epi8_mask() {
15249        let a = _mm512_set1_epi8(-1);
15250        let b = _mm512_set1_epi8(-1);
15251        let m = _mm512_cmpeq_epi8_mask(a, b);
15252        assert_eq!(
15253            m,
15254            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15255        );
15256    }
15257
15258    #[simd_test(enable = "avx512bw")]
15259    unsafe fn test_mm512_mask_cmpeq_epi8_mask() {
15260        let a = _mm512_set1_epi8(-1);
15261        let b = _mm512_set1_epi8(-1);
15262        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15263        let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b);
15264        assert_eq!(
15265            r,
15266            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15267        );
15268    }
15269
15270    #[simd_test(enable = "avx512bw,avx512vl")]
15271    unsafe fn test_mm256_cmpeq_epi8_mask() {
15272        let a = _mm256_set1_epi8(-1);
15273        let b = _mm256_set1_epi8(-1);
15274        let m = _mm256_cmpeq_epi8_mask(a, b);
15275        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15276    }
15277
15278    #[simd_test(enable = "avx512bw,avx512vl")]
15279    unsafe fn test_mm256_mask_cmpeq_epi8_mask() {
15280        let a = _mm256_set1_epi8(-1);
15281        let b = _mm256_set1_epi8(-1);
15282        let mask = 0b01010101_01010101_01010101_01010101;
15283        let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b);
15284        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15285    }
15286
15287    #[simd_test(enable = "avx512bw,avx512vl")]
15288    unsafe fn test_mm_cmpeq_epi8_mask() {
15289        let a = _mm_set1_epi8(-1);
15290        let b = _mm_set1_epi8(-1);
15291        let m = _mm_cmpeq_epi8_mask(a, b);
15292        assert_eq!(m, 0b11111111_11111111);
15293    }
15294
15295    #[simd_test(enable = "avx512bw,avx512vl")]
15296    unsafe fn test_mm_mask_cmpeq_epi8_mask() {
15297        let a = _mm_set1_epi8(-1);
15298        let b = _mm_set1_epi8(-1);
15299        let mask = 0b01010101_01010101;
15300        let r = _mm_mask_cmpeq_epi8_mask(mask, a, b);
15301        assert_eq!(r, 0b01010101_01010101);
15302    }
15303
15304    #[simd_test(enable = "avx512bw")]
15305    unsafe fn test_mm512_cmpneq_epu16_mask() {
15306        let a = _mm512_set1_epi16(2);
15307        let b = _mm512_set1_epi16(1);
15308        let m = _mm512_cmpneq_epu16_mask(a, b);
15309        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15310    }
15311
15312    #[simd_test(enable = "avx512bw")]
15313    unsafe fn test_mm512_mask_cmpneq_epu16_mask() {
15314        let a = _mm512_set1_epi16(2);
15315        let b = _mm512_set1_epi16(1);
15316        let mask = 0b01010101_01010101_01010101_01010101;
15317        let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b);
15318        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15319    }
15320
15321    #[simd_test(enable = "avx512bw,avx512vl")]
15322    unsafe fn test_mm256_cmpneq_epu16_mask() {
15323        let a = _mm256_set1_epi16(2);
15324        let b = _mm256_set1_epi16(1);
15325        let m = _mm256_cmpneq_epu16_mask(a, b);
15326        assert_eq!(m, 0b11111111_11111111);
15327    }
15328
15329    #[simd_test(enable = "avx512bw,avx512vl")]
15330    unsafe fn test_mm256_mask_cmpneq_epu16_mask() {
15331        let a = _mm256_set1_epi16(2);
15332        let b = _mm256_set1_epi16(1);
15333        let mask = 0b01010101_01010101;
15334        let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b);
15335        assert_eq!(r, 0b01010101_01010101);
15336    }
15337
15338    #[simd_test(enable = "avx512bw,avx512vl")]
15339    unsafe fn test_mm_cmpneq_epu16_mask() {
15340        let a = _mm_set1_epi16(2);
15341        let b = _mm_set1_epi16(1);
15342        let m = _mm_cmpneq_epu16_mask(a, b);
15343        assert_eq!(m, 0b11111111);
15344    }
15345
15346    #[simd_test(enable = "avx512bw,avx512vl")]
15347    unsafe fn test_mm_mask_cmpneq_epu16_mask() {
15348        let a = _mm_set1_epi16(2);
15349        let b = _mm_set1_epi16(1);
15350        let mask = 0b01010101;
15351        let r = _mm_mask_cmpneq_epu16_mask(mask, a, b);
15352        assert_eq!(r, 0b01010101);
15353    }
15354
15355    #[simd_test(enable = "avx512bw")]
15356    unsafe fn test_mm512_cmpneq_epu8_mask() {
15357        let a = _mm512_set1_epi8(2);
15358        let b = _mm512_set1_epi8(1);
15359        let m = _mm512_cmpneq_epu8_mask(a, b);
15360        assert_eq!(
15361            m,
15362            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15363        );
15364    }
15365
15366    #[simd_test(enable = "avx512bw")]
15367    unsafe fn test_mm512_mask_cmpneq_epu8_mask() {
15368        let a = _mm512_set1_epi8(2);
15369        let b = _mm512_set1_epi8(1);
15370        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15371        let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b);
15372        assert_eq!(
15373            r,
15374            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15375        );
15376    }
15377
15378    #[simd_test(enable = "avx512bw,avx512vl")]
15379    unsafe fn test_mm256_cmpneq_epu8_mask() {
15380        let a = _mm256_set1_epi8(2);
15381        let b = _mm256_set1_epi8(1);
15382        let m = _mm256_cmpneq_epu8_mask(a, b);
15383        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15384    }
15385
15386    #[simd_test(enable = "avx512bw,avx512vl")]
15387    unsafe fn test_mm256_mask_cmpneq_epu8_mask() {
15388        let a = _mm256_set1_epi8(2);
15389        let b = _mm256_set1_epi8(1);
15390        let mask = 0b01010101_01010101_01010101_01010101;
15391        let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b);
15392        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15393    }
15394
15395    #[simd_test(enable = "avx512bw,avx512vl")]
15396    unsafe fn test_mm_cmpneq_epu8_mask() {
15397        let a = _mm_set1_epi8(2);
15398        let b = _mm_set1_epi8(1);
15399        let m = _mm_cmpneq_epu8_mask(a, b);
15400        assert_eq!(m, 0b11111111_11111111);
15401    }
15402
15403    #[simd_test(enable = "avx512bw,avx512vl")]
15404    unsafe fn test_mm_mask_cmpneq_epu8_mask() {
15405        let a = _mm_set1_epi8(2);
15406        let b = _mm_set1_epi8(1);
15407        let mask = 0b01010101_01010101;
15408        let r = _mm_mask_cmpneq_epu8_mask(mask, a, b);
15409        assert_eq!(r, 0b01010101_01010101);
15410    }
15411
15412    #[simd_test(enable = "avx512bw")]
15413    unsafe fn test_mm512_cmpneq_epi16_mask() {
15414        let a = _mm512_set1_epi16(1);
15415        let b = _mm512_set1_epi16(-1);
15416        let m = _mm512_cmpneq_epi16_mask(a, b);
15417        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15418    }
15419
15420    #[simd_test(enable = "avx512bw")]
15421    unsafe fn test_mm512_mask_cmpneq_epi16_mask() {
15422        let a = _mm512_set1_epi16(1);
15423        let b = _mm512_set1_epi16(-1);
15424        let mask = 0b01010101_01010101_01010101_01010101;
15425        let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b);
15426        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15427    }
15428
15429    #[simd_test(enable = "avx512bw,avx512vl")]
15430    unsafe fn test_mm256_cmpneq_epi16_mask() {
15431        let a = _mm256_set1_epi16(1);
15432        let b = _mm256_set1_epi16(-1);
15433        let m = _mm256_cmpneq_epi16_mask(a, b);
15434        assert_eq!(m, 0b11111111_11111111);
15435    }
15436
15437    #[simd_test(enable = "avx512bw,avx512vl")]
15438    unsafe fn test_mm256_mask_cmpneq_epi16_mask() {
15439        let a = _mm256_set1_epi16(1);
15440        let b = _mm256_set1_epi16(-1);
15441        let mask = 0b01010101_01010101;
15442        let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b);
15443        assert_eq!(r, 0b01010101_01010101);
15444    }
15445
15446    #[simd_test(enable = "avx512bw,avx512vl")]
15447    unsafe fn test_mm_cmpneq_epi16_mask() {
15448        let a = _mm_set1_epi16(1);
15449        let b = _mm_set1_epi16(-1);
15450        let m = _mm_cmpneq_epi16_mask(a, b);
15451        assert_eq!(m, 0b11111111);
15452    }
15453
15454    #[simd_test(enable = "avx512bw,avx512vl")]
15455    unsafe fn test_mm_mask_cmpneq_epi16_mask() {
15456        let a = _mm_set1_epi16(1);
15457        let b = _mm_set1_epi16(-1);
15458        let mask = 0b01010101;
15459        let r = _mm_mask_cmpneq_epi16_mask(mask, a, b);
15460        assert_eq!(r, 0b01010101);
15461    }
15462
15463    #[simd_test(enable = "avx512bw")]
15464    unsafe fn test_mm512_cmpneq_epi8_mask() {
15465        let a = _mm512_set1_epi8(1);
15466        let b = _mm512_set1_epi8(-1);
15467        let m = _mm512_cmpneq_epi8_mask(a, b);
15468        assert_eq!(
15469            m,
15470            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15471        );
15472    }
15473
15474    #[simd_test(enable = "avx512bw")]
15475    unsafe fn test_mm512_mask_cmpneq_epi8_mask() {
15476        let a = _mm512_set1_epi8(1);
15477        let b = _mm512_set1_epi8(-1);
15478        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15479        let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b);
15480        assert_eq!(
15481            r,
15482            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15483        );
15484    }
15485
15486    #[simd_test(enable = "avx512bw,avx512vl")]
15487    unsafe fn test_mm256_cmpneq_epi8_mask() {
15488        let a = _mm256_set1_epi8(1);
15489        let b = _mm256_set1_epi8(-1);
15490        let m = _mm256_cmpneq_epi8_mask(a, b);
15491        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15492    }
15493
15494    #[simd_test(enable = "avx512bw,avx512vl")]
15495    unsafe fn test_mm256_mask_cmpneq_epi8_mask() {
15496        let a = _mm256_set1_epi8(1);
15497        let b = _mm256_set1_epi8(-1);
15498        let mask = 0b01010101_01010101_01010101_01010101;
15499        let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b);
15500        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15501    }
15502
15503    #[simd_test(enable = "avx512bw,avx512vl")]
15504    unsafe fn test_mm_cmpneq_epi8_mask() {
15505        let a = _mm_set1_epi8(1);
15506        let b = _mm_set1_epi8(-1);
15507        let m = _mm_cmpneq_epi8_mask(a, b);
15508        assert_eq!(m, 0b11111111_11111111);
15509    }
15510
15511    #[simd_test(enable = "avx512bw,avx512vl")]
15512    unsafe fn test_mm_mask_cmpneq_epi8_mask() {
15513        let a = _mm_set1_epi8(1);
15514        let b = _mm_set1_epi8(-1);
15515        let mask = 0b01010101_01010101;
15516        let r = _mm_mask_cmpneq_epi8_mask(mask, a, b);
15517        assert_eq!(r, 0b01010101_01010101);
15518    }
15519
15520    #[simd_test(enable = "avx512bw")]
15521    unsafe fn test_mm512_cmp_epu16_mask() {
15522        let a = _mm512_set1_epi16(0);
15523        let b = _mm512_set1_epi16(1);
15524        let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15525        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15526    }
15527
15528    #[simd_test(enable = "avx512bw")]
15529    unsafe fn test_mm512_mask_cmp_epu16_mask() {
15530        let a = _mm512_set1_epi16(0);
15531        let b = _mm512_set1_epi16(1);
15532        let mask = 0b01010101_01010101_01010101_01010101;
15533        let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15534        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15535    }
15536
15537    #[simd_test(enable = "avx512bw,avx512vl")]
15538    unsafe fn test_mm256_cmp_epu16_mask() {
15539        let a = _mm256_set1_epi16(0);
15540        let b = _mm256_set1_epi16(1);
15541        let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15542        assert_eq!(m, 0b11111111_11111111);
15543    }
15544
15545    #[simd_test(enable = "avx512bw,avx512vl")]
15546    unsafe fn test_mm256_mask_cmp_epu16_mask() {
15547        let a = _mm256_set1_epi16(0);
15548        let b = _mm256_set1_epi16(1);
15549        let mask = 0b01010101_01010101;
15550        let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15551        assert_eq!(r, 0b01010101_01010101);
15552    }
15553
15554    #[simd_test(enable = "avx512bw,avx512vl")]
15555    unsafe fn test_mm_cmp_epu16_mask() {
15556        let a = _mm_set1_epi16(0);
15557        let b = _mm_set1_epi16(1);
15558        let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15559        assert_eq!(m, 0b11111111);
15560    }
15561
15562    #[simd_test(enable = "avx512bw,avx512vl")]
15563    unsafe fn test_mm_mask_cmp_epu16_mask() {
15564        let a = _mm_set1_epi16(0);
15565        let b = _mm_set1_epi16(1);
15566        let mask = 0b01010101;
15567        let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15568        assert_eq!(r, 0b01010101);
15569    }
15570
15571    #[simd_test(enable = "avx512bw")]
15572    unsafe fn test_mm512_cmp_epu8_mask() {
15573        let a = _mm512_set1_epi8(0);
15574        let b = _mm512_set1_epi8(1);
15575        let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15576        assert_eq!(
15577            m,
15578            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15579        );
15580    }
15581
15582    #[simd_test(enable = "avx512bw")]
15583    unsafe fn test_mm512_mask_cmp_epu8_mask() {
15584        let a = _mm512_set1_epi8(0);
15585        let b = _mm512_set1_epi8(1);
15586        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15587        let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15588        assert_eq!(
15589            r,
15590            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15591        );
15592    }
15593
15594    #[simd_test(enable = "avx512bw,avx512vl")]
15595    unsafe fn test_mm256_cmp_epu8_mask() {
15596        let a = _mm256_set1_epi8(0);
15597        let b = _mm256_set1_epi8(1);
15598        let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15599        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15600    }
15601
15602    #[simd_test(enable = "avx512bw,avx512vl")]
15603    unsafe fn test_mm256_mask_cmp_epu8_mask() {
15604        let a = _mm256_set1_epi8(0);
15605        let b = _mm256_set1_epi8(1);
15606        let mask = 0b01010101_01010101_01010101_01010101;
15607        let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15608        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15609    }
15610
15611    #[simd_test(enable = "avx512bw,avx512vl")]
15612    unsafe fn test_mm_cmp_epu8_mask() {
15613        let a = _mm_set1_epi8(0);
15614        let b = _mm_set1_epi8(1);
15615        let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15616        assert_eq!(m, 0b11111111_11111111);
15617    }
15618
15619    #[simd_test(enable = "avx512bw,avx512vl")]
15620    unsafe fn test_mm_mask_cmp_epu8_mask() {
15621        let a = _mm_set1_epi8(0);
15622        let b = _mm_set1_epi8(1);
15623        let mask = 0b01010101_01010101;
15624        let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15625        assert_eq!(r, 0b01010101_01010101);
15626    }
15627
15628    #[simd_test(enable = "avx512bw")]
15629    unsafe fn test_mm512_cmp_epi16_mask() {
15630        let a = _mm512_set1_epi16(0);
15631        let b = _mm512_set1_epi16(1);
15632        let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15633        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15634    }
15635
15636    #[simd_test(enable = "avx512bw")]
15637    unsafe fn test_mm512_mask_cmp_epi16_mask() {
15638        let a = _mm512_set1_epi16(0);
15639        let b = _mm512_set1_epi16(1);
15640        let mask = 0b01010101_01010101_01010101_01010101;
15641        let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15642        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15643    }
15644
15645    #[simd_test(enable = "avx512bw,avx512vl")]
15646    unsafe fn test_mm256_cmp_epi16_mask() {
15647        let a = _mm256_set1_epi16(0);
15648        let b = _mm256_set1_epi16(1);
15649        let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15650        assert_eq!(m, 0b11111111_11111111);
15651    }
15652
15653    #[simd_test(enable = "avx512bw,avx512vl")]
15654    unsafe fn test_mm256_mask_cmp_epi16_mask() {
15655        let a = _mm256_set1_epi16(0);
15656        let b = _mm256_set1_epi16(1);
15657        let mask = 0b01010101_01010101;
15658        let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15659        assert_eq!(r, 0b01010101_01010101);
15660    }
15661
15662    #[simd_test(enable = "avx512bw,avx512vl")]
15663    unsafe fn test_mm_cmp_epi16_mask() {
15664        let a = _mm_set1_epi16(0);
15665        let b = _mm_set1_epi16(1);
15666        let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15667        assert_eq!(m, 0b11111111);
15668    }
15669
15670    #[simd_test(enable = "avx512bw,avx512vl")]
15671    unsafe fn test_mm_mask_cmp_epi16_mask() {
15672        let a = _mm_set1_epi16(0);
15673        let b = _mm_set1_epi16(1);
15674        let mask = 0b01010101;
15675        let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15676        assert_eq!(r, 0b01010101);
15677    }
15678
15679    #[simd_test(enable = "avx512bw")]
15680    unsafe fn test_mm512_cmp_epi8_mask() {
15681        let a = _mm512_set1_epi8(0);
15682        let b = _mm512_set1_epi8(1);
15683        let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15684        assert_eq!(
15685            m,
15686            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15687        );
15688    }
15689
15690    #[simd_test(enable = "avx512bw")]
15691    unsafe fn test_mm512_mask_cmp_epi8_mask() {
15692        let a = _mm512_set1_epi8(0);
15693        let b = _mm512_set1_epi8(1);
15694        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15695        let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15696        assert_eq!(
15697            r,
15698            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15699        );
15700    }
15701
15702    #[simd_test(enable = "avx512bw,avx512vl")]
15703    unsafe fn test_mm256_cmp_epi8_mask() {
15704        let a = _mm256_set1_epi8(0);
15705        let b = _mm256_set1_epi8(1);
15706        let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15707        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15708    }
15709
15710    #[simd_test(enable = "avx512bw,avx512vl")]
15711    unsafe fn test_mm256_mask_cmp_epi8_mask() {
15712        let a = _mm256_set1_epi8(0);
15713        let b = _mm256_set1_epi8(1);
15714        let mask = 0b01010101_01010101_01010101_01010101;
15715        let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15716        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15717    }
15718
15719    #[simd_test(enable = "avx512bw,avx512vl")]
15720    unsafe fn test_mm_cmp_epi8_mask() {
15721        let a = _mm_set1_epi8(0);
15722        let b = _mm_set1_epi8(1);
15723        let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15724        assert_eq!(m, 0b11111111_11111111);
15725    }
15726
15727    #[simd_test(enable = "avx512bw,avx512vl")]
15728    unsafe fn test_mm_mask_cmp_epi8_mask() {
15729        let a = _mm_set1_epi8(0);
15730        let b = _mm_set1_epi8(1);
15731        let mask = 0b01010101_01010101;
15732        let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15733        assert_eq!(r, 0b01010101_01010101);
15734    }
15735
15736    #[simd_test(enable = "avx512bw,avx512vl")]
15737    unsafe fn test_mm256_reduce_add_epi16() {
15738        let a = _mm256_set1_epi16(1);
15739        let e = _mm256_reduce_add_epi16(a);
15740        assert_eq!(16, e);
15741    }
15742
15743    #[simd_test(enable = "avx512bw,avx512vl")]
15744    unsafe fn test_mm256_mask_reduce_add_epi16() {
15745        let a = _mm256_set1_epi16(1);
15746        let e = _mm256_mask_reduce_add_epi16(0b11111111_00000000, a);
15747        assert_eq!(8, e);
15748    }
15749
15750    #[simd_test(enable = "avx512bw,avx512vl")]
15751    unsafe fn test_mm_reduce_add_epi16() {
15752        let a = _mm_set1_epi16(1);
15753        let e = _mm_reduce_add_epi16(a);
15754        assert_eq!(8, e);
15755    }
15756
15757    #[simd_test(enable = "avx512bw,avx512vl")]
15758    unsafe fn test_mm_mask_reduce_add_epi16() {
15759        let a = _mm_set1_epi16(1);
15760        let e = _mm_mask_reduce_add_epi16(0b11110000, a);
15761        assert_eq!(4, e);
15762    }
15763
15764    #[simd_test(enable = "avx512bw,avx512vl")]
15765    unsafe fn test_mm256_reduce_add_epi8() {
15766        let a = _mm256_set1_epi8(1);
15767        let e = _mm256_reduce_add_epi8(a);
15768        assert_eq!(32, e);
15769    }
15770
15771    #[simd_test(enable = "avx512bw,avx512vl")]
15772    unsafe fn test_mm256_mask_reduce_add_epi8() {
15773        let a = _mm256_set1_epi8(1);
15774        let e = _mm256_mask_reduce_add_epi8(0b11111111_00000000_11111111_00000000, a);
15775        assert_eq!(16, e);
15776    }
15777
15778    #[simd_test(enable = "avx512bw,avx512vl")]
15779    unsafe fn test_mm_reduce_add_epi8() {
15780        let a = _mm_set1_epi8(1);
15781        let e = _mm_reduce_add_epi8(a);
15782        assert_eq!(16, e);
15783    }
15784
15785    #[simd_test(enable = "avx512bw,avx512vl")]
15786    unsafe fn test_mm_mask_reduce_add_epi8() {
15787        let a = _mm_set1_epi8(1);
15788        let e = _mm_mask_reduce_add_epi8(0b11111111_00000000, a);
15789        assert_eq!(8, e);
15790    }
15791
15792    #[simd_test(enable = "avx512bw,avx512vl")]
15793    unsafe fn test_mm256_reduce_and_epi16() {
15794        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15795        let e = _mm256_reduce_and_epi16(a);
15796        assert_eq!(0, e);
15797    }
15798
15799    #[simd_test(enable = "avx512bw,avx512vl")]
15800    unsafe fn test_mm256_mask_reduce_and_epi16() {
15801        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15802        let e = _mm256_mask_reduce_and_epi16(0b11111111_00000000, a);
15803        assert_eq!(1, e);
15804    }
15805
15806    #[simd_test(enable = "avx512bw,avx512vl")]
15807    unsafe fn test_mm_reduce_and_epi16() {
15808        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15809        let e = _mm_reduce_and_epi16(a);
15810        assert_eq!(0, e);
15811    }
15812
15813    #[simd_test(enable = "avx512bw,avx512vl")]
15814    unsafe fn test_mm_mask_reduce_and_epi16() {
15815        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15816        let e = _mm_mask_reduce_and_epi16(0b11110000, a);
15817        assert_eq!(1, e);
15818    }
15819
15820    #[simd_test(enable = "avx512bw,avx512vl")]
15821    unsafe fn test_mm256_reduce_and_epi8() {
15822        let a = _mm256_set_epi8(
15823            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
15824            2, 2, 2,
15825        );
15826        let e = _mm256_reduce_and_epi8(a);
15827        assert_eq!(0, e);
15828    }
15829
15830    #[simd_test(enable = "avx512bw,avx512vl")]
15831    unsafe fn test_mm256_mask_reduce_and_epi8() {
15832        let a = _mm256_set_epi8(
15833            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
15834            2, 2, 2,
15835        );
15836        let e = _mm256_mask_reduce_and_epi8(0b11111111_00000000_11111111_00000000, a);
15837        assert_eq!(1, e);
15838    }
15839
15840    #[simd_test(enable = "avx512bw,avx512vl")]
15841    unsafe fn test_mm_reduce_and_epi8() {
15842        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15843        let e = _mm_reduce_and_epi8(a);
15844        assert_eq!(0, e);
15845    }
15846
15847    #[simd_test(enable = "avx512bw,avx512vl")]
15848    unsafe fn test_mm_mask_reduce_and_epi8() {
15849        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15850        let e = _mm_mask_reduce_and_epi8(0b11111111_00000000, a);
15851        assert_eq!(1, e);
15852    }
15853
15854    #[simd_test(enable = "avx512bw,avx512vl")]
15855    unsafe fn test_mm256_reduce_mul_epi16() {
15856        let a = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
15857        let e = _mm256_reduce_mul_epi16(a);
15858        assert_eq!(256, e);
15859    }
15860
15861    #[simd_test(enable = "avx512bw,avx512vl")]
15862    unsafe fn test_mm256_mask_reduce_mul_epi16() {
15863        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15864        let e = _mm256_mask_reduce_mul_epi16(0b11111111_00000000, a);
15865        assert_eq!(1, e);
15866    }
15867
15868    #[simd_test(enable = "avx512bw,avx512vl")]
15869    unsafe fn test_mm_reduce_mul_epi16() {
15870        let a = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
15871        let e = _mm_reduce_mul_epi16(a);
15872        assert_eq!(16, e);
15873    }
15874
15875    #[simd_test(enable = "avx512bw,avx512vl")]
15876    unsafe fn test_mm_mask_reduce_mul_epi16() {
15877        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15878        let e = _mm_mask_reduce_mul_epi16(0b11110000, a);
15879        assert_eq!(1, e);
15880    }
15881
15882    #[simd_test(enable = "avx512bw,avx512vl")]
15883    unsafe fn test_mm256_reduce_mul_epi8() {
15884        let a = _mm256_set_epi8(
15885            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15886            2, 2, 2,
15887        );
15888        let e = _mm256_reduce_mul_epi8(a);
15889        assert_eq!(64, e);
15890    }
15891
15892    #[simd_test(enable = "avx512bw,avx512vl")]
15893    unsafe fn test_mm256_mask_reduce_mul_epi8() {
15894        let a = _mm256_set_epi8(
15895            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15896            2, 2, 2,
15897        );
15898        let e = _mm256_mask_reduce_mul_epi8(0b11111111_00000000_11111111_00000000, a);
15899        assert_eq!(1, e);
15900    }
15901
15902    #[simd_test(enable = "avx512bw,avx512vl")]
15903    unsafe fn test_mm_reduce_mul_epi8() {
15904        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
15905        let e = _mm_reduce_mul_epi8(a);
15906        assert_eq!(8, e);
15907    }
15908
15909    #[simd_test(enable = "avx512bw,avx512vl")]
15910    unsafe fn test_mm_mask_reduce_mul_epi8() {
15911        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
15912        let e = _mm_mask_reduce_mul_epi8(0b11111111_00000000, a);
15913        assert_eq!(1, e);
15914    }
15915
15916    #[simd_test(enable = "avx512bw,avx512vl")]
15917    unsafe fn test_mm256_reduce_max_epi16() {
15918        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15919        let e: i16 = _mm256_reduce_max_epi16(a);
15920        assert_eq!(15, e);
15921    }
15922
15923    #[simd_test(enable = "avx512bw,avx512vl")]
15924    unsafe fn test_mm256_mask_reduce_max_epi16() {
15925        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15926        let e: i16 = _mm256_mask_reduce_max_epi16(0b11111111_00000000, a);
15927        assert_eq!(7, e);
15928    }
15929
15930    #[simd_test(enable = "avx512bw,avx512vl")]
15931    unsafe fn test_mm_reduce_max_epi16() {
15932        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15933        let e: i16 = _mm_reduce_max_epi16(a);
15934        assert_eq!(7, e);
15935    }
15936
15937    #[simd_test(enable = "avx512bw,avx512vl")]
15938    unsafe fn test_mm_mask_reduce_max_epi16() {
15939        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15940        let e: i16 = _mm_mask_reduce_max_epi16(0b11110000, a);
15941        assert_eq!(3, e);
15942    }
15943
15944    #[simd_test(enable = "avx512bw,avx512vl")]
15945    unsafe fn test_mm256_reduce_max_epi8() {
15946        let a = _mm256_set_epi8(
15947            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15948            24, 25, 26, 27, 28, 29, 30, 31,
15949        );
15950        let e: i8 = _mm256_reduce_max_epi8(a);
15951        assert_eq!(31, e);
15952    }
15953
15954    #[simd_test(enable = "avx512bw,avx512vl")]
15955    unsafe fn test_mm256_mask_reduce_max_epi8() {
15956        let a = _mm256_set_epi8(
15957            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15958            24, 25, 26, 27, 28, 29, 30, 31,
15959        );
15960        let e: i8 = _mm256_mask_reduce_max_epi8(0b1111111111111111_0000000000000000, a);
15961        assert_eq!(15, e);
15962    }
15963
15964    #[simd_test(enable = "avx512bw,avx512vl")]
15965    unsafe fn test_mm_reduce_max_epi8() {
15966        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15967        let e: i8 = _mm_reduce_max_epi8(a);
15968        assert_eq!(15, e);
15969    }
15970
15971    #[simd_test(enable = "avx512bw,avx512vl")]
15972    unsafe fn test_mm_mask_reduce_max_epi8() {
15973        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15974        let e: i8 = _mm_mask_reduce_max_epi8(0b11111111_00000000, a);
15975        assert_eq!(7, e);
15976    }
15977
15978    #[simd_test(enable = "avx512bw,avx512vl")]
15979    unsafe fn test_mm256_reduce_max_epu16() {
15980        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15981        let e: u16 = _mm256_reduce_max_epu16(a);
15982        assert_eq!(15, e);
15983    }
15984
15985    #[simd_test(enable = "avx512bw,avx512vl")]
15986    unsafe fn test_mm256_mask_reduce_max_epu16() {
15987        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15988        let e: u16 = _mm256_mask_reduce_max_epu16(0b11111111_00000000, a);
15989        assert_eq!(7, e);
15990    }
15991
15992    #[simd_test(enable = "avx512bw,avx512vl")]
15993    unsafe fn test_mm_reduce_max_epu16() {
15994        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15995        let e: u16 = _mm_reduce_max_epu16(a);
15996        assert_eq!(7, e);
15997    }
15998
15999    #[simd_test(enable = "avx512bw,avx512vl")]
16000    unsafe fn test_mm_mask_reduce_max_epu16() {
16001        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16002        let e: u16 = _mm_mask_reduce_max_epu16(0b11110000, a);
16003        assert_eq!(3, e);
16004    }
16005
16006    #[simd_test(enable = "avx512bw,avx512vl")]
16007    unsafe fn test_mm256_reduce_max_epu8() {
16008        let a = _mm256_set_epi8(
16009            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16010            24, 25, 26, 27, 28, 29, 30, 31,
16011        );
16012        let e: u8 = _mm256_reduce_max_epu8(a);
16013        assert_eq!(31, e);
16014    }
16015
16016    #[simd_test(enable = "avx512bw,avx512vl")]
16017    unsafe fn test_mm256_mask_reduce_max_epu8() {
16018        let a = _mm256_set_epi8(
16019            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16020            24, 25, 26, 27, 28, 29, 30, 31,
16021        );
16022        let e: u8 = _mm256_mask_reduce_max_epu8(0b1111111111111111_0000000000000000, a);
16023        assert_eq!(15, e);
16024    }
16025
16026    #[simd_test(enable = "avx512bw,avx512vl")]
16027    unsafe fn test_mm_reduce_max_epu8() {
16028        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16029        let e: u8 = _mm_reduce_max_epu8(a);
16030        assert_eq!(15, e);
16031    }
16032
16033    #[simd_test(enable = "avx512bw,avx512vl")]
16034    unsafe fn test_mm_mask_reduce_max_epu8() {
16035        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16036        let e: u8 = _mm_mask_reduce_max_epu8(0b11111111_00000000, a);
16037        assert_eq!(7, e);
16038    }
16039
16040    #[simd_test(enable = "avx512bw,avx512vl")]
16041    unsafe fn test_mm256_reduce_min_epi16() {
16042        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16043        let e: i16 = _mm256_reduce_min_epi16(a);
16044        assert_eq!(0, e);
16045    }
16046
16047    #[simd_test(enable = "avx512bw,avx512vl")]
16048    unsafe fn test_mm256_mask_reduce_min_epi16() {
16049        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16050        let e: i16 = _mm256_mask_reduce_min_epi16(0b11111111_00000000, a);
16051        assert_eq!(0, e);
16052    }
16053
16054    #[simd_test(enable = "avx512bw,avx512vl")]
16055    unsafe fn test_mm_reduce_min_epi16() {
16056        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16057        let e: i16 = _mm_reduce_min_epi16(a);
16058        assert_eq!(0, e);
16059    }
16060
16061    #[simd_test(enable = "avx512bw,avx512vl")]
16062    unsafe fn test_mm_mask_reduce_min_epi16() {
16063        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16064        let e: i16 = _mm_mask_reduce_min_epi16(0b11110000, a);
16065        assert_eq!(0, e);
16066    }
16067
16068    #[simd_test(enable = "avx512bw,avx512vl")]
16069    unsafe fn test_mm256_reduce_min_epi8() {
16070        let a = _mm256_set_epi8(
16071            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16072            24, 25, 26, 27, 28, 29, 30, 31,
16073        );
16074        let e: i8 = _mm256_reduce_min_epi8(a);
16075        assert_eq!(0, e);
16076    }
16077
16078    #[simd_test(enable = "avx512bw,avx512vl")]
16079    unsafe fn test_mm256_mask_reduce_min_epi8() {
16080        let a = _mm256_set_epi8(
16081            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16082            24, 25, 26, 27, 28, 29, 30, 31,
16083        );
16084        let e: i8 = _mm256_mask_reduce_min_epi8(0b1111111111111111_0000000000000000, a);
16085        assert_eq!(0, e);
16086    }
16087
16088    #[simd_test(enable = "avx512bw,avx512vl")]
16089    unsafe fn test_mm_reduce_min_epi8() {
16090        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16091        let e: i8 = _mm_reduce_min_epi8(a);
16092        assert_eq!(0, e);
16093    }
16094
16095    #[simd_test(enable = "avx512bw,avx512vl")]
16096    unsafe fn test_mm_mask_reduce_min_epi8() {
16097        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16098        let e: i8 = _mm_mask_reduce_min_epi8(0b11111111_00000000, a);
16099        assert_eq!(0, e);
16100    }
16101
16102    #[simd_test(enable = "avx512bw,avx512vl")]
16103    unsafe fn test_mm256_reduce_min_epu16() {
16104        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16105        let e: u16 = _mm256_reduce_min_epu16(a);
16106        assert_eq!(0, e);
16107    }
16108
16109    #[simd_test(enable = "avx512bw,avx512vl")]
16110    unsafe fn test_mm256_mask_reduce_min_epu16() {
16111        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16112        let e: u16 = _mm256_mask_reduce_min_epu16(0b11111111_00000000, a);
16113        assert_eq!(0, e);
16114    }
16115
16116    #[simd_test(enable = "avx512bw,avx512vl")]
16117    unsafe fn test_mm_reduce_min_epu16() {
16118        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16119        let e: u16 = _mm_reduce_min_epu16(a);
16120        assert_eq!(0, e);
16121    }
16122
16123    #[simd_test(enable = "avx512bw,avx512vl")]
16124    unsafe fn test_mm_mask_reduce_min_epu16() {
16125        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16126        let e: u16 = _mm_mask_reduce_min_epu16(0b11110000, a);
16127        assert_eq!(0, e);
16128    }
16129
16130    #[simd_test(enable = "avx512bw,avx512vl")]
16131    unsafe fn test_mm256_reduce_min_epu8() {
16132        let a = _mm256_set_epi8(
16133            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16134            24, 25, 26, 27, 28, 29, 30, 31,
16135        );
16136        let e: u8 = _mm256_reduce_min_epu8(a);
16137        assert_eq!(0, e);
16138    }
16139
16140    #[simd_test(enable = "avx512bw,avx512vl")]
16141    unsafe fn test_mm256_mask_reduce_min_epu8() {
16142        let a = _mm256_set_epi8(
16143            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16144            24, 25, 26, 27, 28, 29, 30, 31,
16145        );
16146        let e: u8 = _mm256_mask_reduce_min_epu8(0b1111111111111111_0000000000000000, a);
16147        assert_eq!(0, e);
16148    }
16149
16150    #[simd_test(enable = "avx512bw,avx512vl")]
16151    unsafe fn test_mm_reduce_min_epu8() {
16152        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16153        let e: u8 = _mm_reduce_min_epu8(a);
16154        assert_eq!(0, e);
16155    }
16156
16157    #[simd_test(enable = "avx512bw,avx512vl")]
16158    unsafe fn test_mm_mask_reduce_min_epu8() {
16159        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16160        let e: u8 = _mm_mask_reduce_min_epu8(0b11111111_00000000, a);
16161        assert_eq!(0, e);
16162    }
16163
16164    #[simd_test(enable = "avx512bw,avx512vl")]
16165    unsafe fn test_mm256_reduce_or_epi16() {
16166        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16167        let e = _mm256_reduce_or_epi16(a);
16168        assert_eq!(3, e);
16169    }
16170
16171    #[simd_test(enable = "avx512bw,avx512vl")]
16172    unsafe fn test_mm256_mask_reduce_or_epi16() {
16173        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16174        let e = _mm256_mask_reduce_or_epi16(0b11111111_00000000, a);
16175        assert_eq!(1, e);
16176    }
16177
16178    #[simd_test(enable = "avx512bw,avx512vl")]
16179    unsafe fn test_mm_reduce_or_epi16() {
16180        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16181        let e = _mm_reduce_or_epi16(a);
16182        assert_eq!(3, e);
16183    }
16184
16185    #[simd_test(enable = "avx512bw,avx512vl")]
16186    unsafe fn test_mm_mask_reduce_or_epi16() {
16187        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16188        let e = _mm_mask_reduce_or_epi16(0b11110000, a);
16189        assert_eq!(1, e);
16190    }
16191
16192    #[simd_test(enable = "avx512bw,avx512vl")]
16193    unsafe fn test_mm256_reduce_or_epi8() {
16194        let a = _mm256_set_epi8(
16195            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16196            2, 2, 2,
16197        );
16198        let e = _mm256_reduce_or_epi8(a);
16199        assert_eq!(3, e);
16200    }
16201
16202    #[simd_test(enable = "avx512bw,avx512vl")]
16203    unsafe fn test_mm256_mask_reduce_or_epi8() {
16204        let a = _mm256_set_epi8(
16205            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16206            2, 2, 2,
16207        );
16208        let e = _mm256_mask_reduce_or_epi8(0b11111111_00000000_11111111_00000000, a);
16209        assert_eq!(1, e);
16210    }
16211
16212    #[simd_test(enable = "avx512bw,avx512vl")]
16213    unsafe fn test_mm_reduce_or_epi8() {
16214        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16215        let e = _mm_reduce_or_epi8(a);
16216        assert_eq!(3, e);
16217    }
16218
16219    #[simd_test(enable = "avx512bw,avx512vl")]
16220    unsafe fn test_mm_mask_reduce_or_epi8() {
16221        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16222        let e = _mm_mask_reduce_or_epi8(0b11111111_00000000, a);
16223        assert_eq!(1, e);
16224    }
16225
16226    #[simd_test(enable = "avx512bw")]
16227    unsafe fn test_mm512_loadu_epi16() {
16228        #[rustfmt::skip]
16229        let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
16230        let r = _mm512_loadu_epi16(&a[0]);
16231        #[rustfmt::skip]
16232        let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16233        assert_eq_m512i(r, e);
16234    }
16235
16236    #[simd_test(enable = "avx512bw,avx512vl")]
16237    unsafe fn test_mm256_loadu_epi16() {
16238        let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16239        let r = _mm256_loadu_epi16(&a[0]);
16240        let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16241        assert_eq_m256i(r, e);
16242    }
16243
16244    #[simd_test(enable = "avx512bw,avx512vl")]
16245    unsafe fn test_mm_loadu_epi16() {
16246        let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
16247        let r = _mm_loadu_epi16(&a[0]);
16248        let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
16249        assert_eq_m128i(r, e);
16250    }
16251
16252    #[simd_test(enable = "avx512bw")]
16253    unsafe fn test_mm512_loadu_epi8() {
16254        #[rustfmt::skip]
16255        let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16256                           1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
16257        let r = _mm512_loadu_epi8(&a[0]);
16258        #[rustfmt::skip]
16259        let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
16260                                32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16261        assert_eq_m512i(r, e);
16262    }
16263
16264    #[simd_test(enable = "avx512bw,avx512vl")]
16265    unsafe fn test_mm256_loadu_epi8() {
16266        #[rustfmt::skip]
16267        let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
16268        let r = _mm256_loadu_epi8(&a[0]);
16269        #[rustfmt::skip]
16270        let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16271        assert_eq_m256i(r, e);
16272    }
16273
16274    #[simd_test(enable = "avx512bw,avx512vl")]
16275    unsafe fn test_mm_loadu_epi8() {
16276        let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16277        let r = _mm_loadu_epi8(&a[0]);
16278        let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16279        assert_eq_m128i(r, e);
16280    }
16281
16282    #[simd_test(enable = "avx512bw")]
16283    unsafe fn test_mm512_storeu_epi16() {
16284        let a = _mm512_set1_epi16(9);
16285        let mut r = _mm512_undefined_epi32();
16286        _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
16287        assert_eq_m512i(r, a);
16288    }
16289
16290    #[simd_test(enable = "avx512bw,avx512vl")]
16291    unsafe fn test_mm256_storeu_epi16() {
16292        let a = _mm256_set1_epi16(9);
16293        let mut r = _mm256_set1_epi32(0);
16294        _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
16295        assert_eq_m256i(r, a);
16296    }
16297
16298    #[simd_test(enable = "avx512bw,avx512vl")]
16299    unsafe fn test_mm_storeu_epi16() {
16300        let a = _mm_set1_epi16(9);
16301        let mut r = _mm_set1_epi32(0);
16302        _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
16303        assert_eq_m128i(r, a);
16304    }
16305
16306    #[simd_test(enable = "avx512bw")]
16307    unsafe fn test_mm512_storeu_epi8() {
16308        let a = _mm512_set1_epi8(9);
16309        let mut r = _mm512_undefined_epi32();
16310        _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
16311        assert_eq_m512i(r, a);
16312    }
16313
16314    #[simd_test(enable = "avx512bw,avx512vl")]
16315    unsafe fn test_mm256_storeu_epi8() {
16316        let a = _mm256_set1_epi8(9);
16317        let mut r = _mm256_set1_epi32(0);
16318        _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
16319        assert_eq_m256i(r, a);
16320    }
16321
16322    #[simd_test(enable = "avx512bw,avx512vl")]
16323    unsafe fn test_mm_storeu_epi8() {
16324        let a = _mm_set1_epi8(9);
16325        let mut r = _mm_set1_epi32(0);
16326        _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
16327        assert_eq_m128i(r, a);
16328    }
16329
16330    #[simd_test(enable = "avx512bw")]
16331    unsafe fn test_mm512_mask_loadu_epi16() {
16332        let src = _mm512_set1_epi16(42);
16333        let a = &[
16334            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16335            24, 25, 26, 27, 28, 29, 30, 31, 32,
16336        ];
16337        let p = a.as_ptr();
16338        let m = 0b10101010_11001100_11101000_11001010;
16339        let r = _mm512_mask_loadu_epi16(src, m, black_box(p));
16340        let e = &[
16341            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16342            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16343        ];
16344        let e = _mm512_loadu_epi16(e.as_ptr());
16345        assert_eq_m512i(r, e);
16346    }
16347
16348    #[simd_test(enable = "avx512bw")]
16349    unsafe fn test_mm512_maskz_loadu_epi16() {
16350        let a = &[
16351            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16352            24, 25, 26, 27, 28, 29, 30, 31, 32,
16353        ];
16354        let p = a.as_ptr();
16355        let m = 0b10101010_11001100_11101000_11001010;
16356        let r = _mm512_maskz_loadu_epi16(m, black_box(p));
16357        let e = &[
16358            0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16359            26, 0, 28, 0, 30, 0, 32,
16360        ];
16361        let e = _mm512_loadu_epi16(e.as_ptr());
16362        assert_eq_m512i(r, e);
16363    }
16364
16365    #[simd_test(enable = "avx512bw")]
16366    unsafe fn test_mm512_mask_storeu_epi16() {
16367        let mut r = [42_i16; 32];
16368        let a = &[
16369            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16370            24, 25, 26, 27, 28, 29, 30, 31, 32,
16371        ];
16372        let a = _mm512_loadu_epi16(a.as_ptr());
16373        let m = 0b10101010_11001100_11101000_11001010;
16374        _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16375        let e = &[
16376            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16377            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16378        ];
16379        let e = _mm512_loadu_epi16(e.as_ptr());
16380        assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e);
16381    }
16382
16383    #[simd_test(enable = "avx512bw")]
16384    unsafe fn test_mm512_mask_loadu_epi8() {
16385        let src = _mm512_set1_epi8(42);
16386        let a = &[
16387            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16388            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16389            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16390        ];
16391        let p = a.as_ptr();
16392        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16393        let r = _mm512_mask_loadu_epi8(src, m, black_box(p));
16394        let e = &[
16395            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16396            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
16397            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
16398        ];
16399        let e = _mm512_loadu_epi8(e.as_ptr());
16400        assert_eq_m512i(r, e);
16401    }
16402
16403    #[simd_test(enable = "avx512bw")]
16404    unsafe fn test_mm512_maskz_loadu_epi8() {
16405        let a = &[
16406            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16407            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16408            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16409        ];
16410        let p = a.as_ptr();
16411        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16412        let r = _mm512_maskz_loadu_epi8(m, black_box(p));
16413        let e = &[
16414            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16415            26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49,
16416            50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0,
16417        ];
16418        let e = _mm512_loadu_epi8(e.as_ptr());
16419        assert_eq_m512i(r, e);
16420    }
16421
16422    #[simd_test(enable = "avx512bw")]
16423    unsafe fn test_mm512_mask_storeu_epi8() {
16424        let mut r = [42_i8; 64];
16425        let a = &[
16426            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16427            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16428            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16429        ];
16430        let a = _mm512_loadu_epi8(a.as_ptr());
16431        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16432        _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16433        let e = &[
16434            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16435            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
16436            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
16437        ];
16438        let e = _mm512_loadu_epi8(e.as_ptr());
16439        assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e);
16440    }
16441
16442    #[simd_test(enable = "avx512bw,avx512vl")]
16443    unsafe fn test_mm256_mask_loadu_epi16() {
16444        let src = _mm256_set1_epi16(42);
16445        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16446        let p = a.as_ptr();
16447        let m = 0b11101000_11001010;
16448        let r = _mm256_mask_loadu_epi16(src, m, black_box(p));
16449        let e = &[
16450            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16451        ];
16452        let e = _mm256_loadu_epi16(e.as_ptr());
16453        assert_eq_m256i(r, e);
16454    }
16455
16456    #[simd_test(enable = "avx512bw,avx512vl")]
16457    unsafe fn test_mm256_maskz_loadu_epi16() {
16458        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16459        let p = a.as_ptr();
16460        let m = 0b11101000_11001010;
16461        let r = _mm256_maskz_loadu_epi16(m, black_box(p));
16462        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
16463        let e = _mm256_loadu_epi16(e.as_ptr());
16464        assert_eq_m256i(r, e);
16465    }
16466
16467    #[simd_test(enable = "avx512bw,avx512vl")]
16468    unsafe fn test_mm256_mask_storeu_epi16() {
16469        let mut r = [42_i16; 16];
16470        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16471        let a = _mm256_loadu_epi16(a.as_ptr());
16472        let m = 0b11101000_11001010;
16473        _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16474        let e = &[
16475            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16476        ];
16477        let e = _mm256_loadu_epi16(e.as_ptr());
16478        assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e);
16479    }
16480
16481    #[simd_test(enable = "avx512bw,avx512vl")]
16482    unsafe fn test_mm256_mask_loadu_epi8() {
16483        let src = _mm256_set1_epi8(42);
16484        let a = &[
16485            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16486            24, 25, 26, 27, 28, 29, 30, 31, 32,
16487        ];
16488        let p = a.as_ptr();
16489        let m = 0b10101010_11001100_11101000_11001010;
16490        let r = _mm256_mask_loadu_epi8(src, m, black_box(p));
16491        let e = &[
16492            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16493            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16494        ];
16495        let e = _mm256_loadu_epi8(e.as_ptr());
16496        assert_eq_m256i(r, e);
16497    }
16498
16499    #[simd_test(enable = "avx512bw,avx512vl")]
16500    unsafe fn test_mm256_maskz_loadu_epi8() {
16501        let a = &[
16502            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16503            24, 25, 26, 27, 28, 29, 30, 31, 32,
16504        ];
16505        let p = a.as_ptr();
16506        let m = 0b10101010_11001100_11101000_11001010;
16507        let r = _mm256_maskz_loadu_epi8(m, black_box(p));
16508        let e = &[
16509            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16510            26, 0, 28, 0, 30, 0, 32,
16511        ];
16512        let e = _mm256_loadu_epi8(e.as_ptr());
16513        assert_eq_m256i(r, e);
16514    }
16515
16516    #[simd_test(enable = "avx512bw,avx512vl")]
16517    unsafe fn test_mm256_mask_storeu_epi8() {
16518        let mut r = [42_i8; 32];
16519        let a = &[
16520            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16521            24, 25, 26, 27, 28, 29, 30, 31, 32,
16522        ];
16523        let a = _mm256_loadu_epi8(a.as_ptr());
16524        let m = 0b10101010_11001100_11101000_11001010;
16525        _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16526        let e = &[
16527            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16528            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16529        ];
16530        let e = _mm256_loadu_epi8(e.as_ptr());
16531        assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e);
16532    }
16533
16534    #[simd_test(enable = "avx512bw,avx512vl")]
16535    unsafe fn test_mm_mask_loadu_epi16() {
16536        let src = _mm_set1_epi16(42);
16537        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16538        let p = a.as_ptr();
16539        let m = 0b11001010;
16540        let r = _mm_mask_loadu_epi16(src, m, black_box(p));
16541        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
16542        let e = _mm_loadu_epi16(e.as_ptr());
16543        assert_eq_m128i(r, e);
16544    }
16545
16546    #[simd_test(enable = "avx512bw,avx512vl")]
16547    unsafe fn test_mm_maskz_loadu_epi16() {
16548        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16549        let p = a.as_ptr();
16550        let m = 0b11001010;
16551        let r = _mm_maskz_loadu_epi16(m, black_box(p));
16552        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8];
16553        let e = _mm_loadu_epi16(e.as_ptr());
16554        assert_eq_m128i(r, e);
16555    }
16556
16557    #[simd_test(enable = "avx512bw,avx512vl")]
16558    unsafe fn test_mm_mask_storeu_epi16() {
16559        let mut r = [42_i16; 8];
16560        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16561        let a = _mm_loadu_epi16(a.as_ptr());
16562        let m = 0b11001010;
16563        _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16564        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
16565        let e = _mm_loadu_epi16(e.as_ptr());
16566        assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e);
16567    }
16568
16569    #[simd_test(enable = "avx512bw,avx512vl")]
16570    unsafe fn test_mm_mask_loadu_epi8() {
16571        let src = _mm_set1_epi8(42);
16572        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16573        let p = a.as_ptr();
16574        let m = 0b11101000_11001010;
16575        let r = _mm_mask_loadu_epi8(src, m, black_box(p));
16576        let e = &[
16577            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16578        ];
16579        let e = _mm_loadu_epi8(e.as_ptr());
16580        assert_eq_m128i(r, e);
16581    }
16582
16583    #[simd_test(enable = "avx512bw,avx512vl")]
16584    unsafe fn test_mm_maskz_loadu_epi8() {
16585        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16586        let p = a.as_ptr();
16587        let m = 0b11101000_11001010;
16588        let r = _mm_maskz_loadu_epi8(m, black_box(p));
16589        let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
16590        let e = _mm_loadu_epi8(e.as_ptr());
16591        assert_eq_m128i(r, e);
16592    }
16593
16594    #[simd_test(enable = "avx512bw,avx512vl")]
16595    unsafe fn test_mm_mask_storeu_epi8() {
16596        let mut r = [42_i8; 16];
16597        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16598        let a = _mm_loadu_epi8(a.as_ptr());
16599        let m = 0b11101000_11001010;
16600        _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16601        let e = &[
16602            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16603        ];
16604        let e = _mm_loadu_epi8(e.as_ptr());
16605        assert_eq_m128i(_mm_loadu_epi8(r.as_ptr()), e);
16606    }
16607
16608    #[simd_test(enable = "avx512bw")]
16609    unsafe fn test_mm512_madd_epi16() {
16610        let a = _mm512_set1_epi16(1);
16611        let b = _mm512_set1_epi16(1);
16612        let r = _mm512_madd_epi16(a, b);
16613        let e = _mm512_set1_epi32(2);
16614        assert_eq_m512i(r, e);
16615    }
16616
16617    #[simd_test(enable = "avx512bw")]
16618    unsafe fn test_mm512_mask_madd_epi16() {
16619        let a = _mm512_set1_epi16(1);
16620        let b = _mm512_set1_epi16(1);
16621        let r = _mm512_mask_madd_epi16(a, 0, a, b);
16622        assert_eq_m512i(r, a);
16623        let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b);
16624        let e = _mm512_set_epi32(
16625            1 << 16 | 1,
16626            1 << 16 | 1,
16627            1 << 16 | 1,
16628            1 << 16 | 1,
16629            1 << 16 | 1,
16630            1 << 16 | 1,
16631            1 << 16 | 1,
16632            1 << 16 | 1,
16633            1 << 16 | 1,
16634            1 << 16 | 1,
16635            1 << 16 | 1,
16636            1 << 16 | 1,
16637            2,
16638            2,
16639            2,
16640            2,
16641        );
16642        assert_eq_m512i(r, e);
16643    }
16644
16645    #[simd_test(enable = "avx512bw")]
16646    unsafe fn test_mm512_maskz_madd_epi16() {
16647        let a = _mm512_set1_epi16(1);
16648        let b = _mm512_set1_epi16(1);
16649        let r = _mm512_maskz_madd_epi16(0, a, b);
16650        assert_eq_m512i(r, _mm512_setzero_si512());
16651        let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b);
16652        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2);
16653        assert_eq_m512i(r, e);
16654    }
16655
16656    #[simd_test(enable = "avx512bw,avx512vl")]
16657    unsafe fn test_mm256_mask_madd_epi16() {
16658        let a = _mm256_set1_epi16(1);
16659        let b = _mm256_set1_epi16(1);
16660        let r = _mm256_mask_madd_epi16(a, 0, a, b);
16661        assert_eq_m256i(r, a);
16662        let r = _mm256_mask_madd_epi16(a, 0b00001111, a, b);
16663        let e = _mm256_set_epi32(
16664            1 << 16 | 1,
16665            1 << 16 | 1,
16666            1 << 16 | 1,
16667            1 << 16 | 1,
16668            2,
16669            2,
16670            2,
16671            2,
16672        );
16673        assert_eq_m256i(r, e);
16674    }
16675
16676    #[simd_test(enable = "avx512bw,avx512vl")]
16677    unsafe fn test_mm256_maskz_madd_epi16() {
16678        let a = _mm256_set1_epi16(1);
16679        let b = _mm256_set1_epi16(1);
16680        let r = _mm256_maskz_madd_epi16(0, a, b);
16681        assert_eq_m256i(r, _mm256_setzero_si256());
16682        let r = _mm256_maskz_madd_epi16(0b00001111, a, b);
16683        let e = _mm256_set_epi32(0, 0, 0, 0, 2, 2, 2, 2);
16684        assert_eq_m256i(r, e);
16685    }
16686
16687    #[simd_test(enable = "avx512bw,avx512vl")]
16688    unsafe fn test_mm_mask_madd_epi16() {
16689        let a = _mm_set1_epi16(1);
16690        let b = _mm_set1_epi16(1);
16691        let r = _mm_mask_madd_epi16(a, 0, a, b);
16692        assert_eq_m128i(r, a);
16693        let r = _mm_mask_madd_epi16(a, 0b00001111, a, b);
16694        let e = _mm_set_epi32(2, 2, 2, 2);
16695        assert_eq_m128i(r, e);
16696    }
16697
16698    #[simd_test(enable = "avx512bw,avx512vl")]
16699    unsafe fn test_mm_maskz_madd_epi16() {
16700        let a = _mm_set1_epi16(1);
16701        let b = _mm_set1_epi16(1);
16702        let r = _mm_maskz_madd_epi16(0, a, b);
16703        assert_eq_m128i(r, _mm_setzero_si128());
16704        let r = _mm_maskz_madd_epi16(0b00001111, a, b);
16705        let e = _mm_set_epi32(2, 2, 2, 2);
16706        assert_eq_m128i(r, e);
16707    }
16708
16709    #[simd_test(enable = "avx512bw")]
16710    unsafe fn test_mm512_maddubs_epi16() {
16711        let a = _mm512_set1_epi8(1);
16712        let b = _mm512_set1_epi8(1);
16713        let r = _mm512_maddubs_epi16(a, b);
16714        let e = _mm512_set1_epi16(2);
16715        assert_eq_m512i(r, e);
16716    }
16717
16718    #[simd_test(enable = "avx512bw")]
16719    unsafe fn test_mm512_mask_maddubs_epi16() {
16720        let a = _mm512_set1_epi8(1);
16721        let b = _mm512_set1_epi8(1);
16722        let src = _mm512_set1_epi16(1);
16723        let r = _mm512_mask_maddubs_epi16(src, 0, a, b);
16724        assert_eq_m512i(r, src);
16725        let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b);
16726        #[rustfmt::skip]
16727        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16728                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2);
16729        assert_eq_m512i(r, e);
16730    }
16731
16732    #[simd_test(enable = "avx512bw")]
16733    unsafe fn test_mm512_maskz_maddubs_epi16() {
16734        let a = _mm512_set1_epi8(1);
16735        let b = _mm512_set1_epi8(1);
16736        let r = _mm512_maskz_maddubs_epi16(0, a, b);
16737        assert_eq_m512i(r, _mm512_setzero_si512());
16738        let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b);
16739        #[rustfmt::skip]
16740        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
16741                                 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
16742        assert_eq_m512i(r, e);
16743    }
16744
16745    #[simd_test(enable = "avx512bw,avx512vl")]
16746    unsafe fn test_mm256_mask_maddubs_epi16() {
16747        let a = _mm256_set1_epi8(1);
16748        let b = _mm256_set1_epi8(1);
16749        let src = _mm256_set1_epi16(1);
16750        let r = _mm256_mask_maddubs_epi16(src, 0, a, b);
16751        assert_eq_m256i(r, src);
16752        let r = _mm256_mask_add_epi16(src, 0b00000000_00000001, a, b);
16753        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
16754        assert_eq_m256i(r, e);
16755    }
16756
16757    #[simd_test(enable = "avx512bw,avx512vl")]
16758    unsafe fn test_mm256_maskz_maddubs_epi16() {
16759        let a = _mm256_set1_epi8(1);
16760        let b = _mm256_set1_epi8(1);
16761        let r = _mm256_maskz_maddubs_epi16(0, a, b);
16762        assert_eq_m256i(r, _mm256_setzero_si256());
16763        let r = _mm256_maskz_maddubs_epi16(0b00000000_11111111, a, b);
16764        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
16765        assert_eq_m256i(r, e);
16766    }
16767
16768    #[simd_test(enable = "avx512bw,avx512vl")]
16769    unsafe fn test_mm_mask_maddubs_epi16() {
16770        let a = _mm_set1_epi8(1);
16771        let b = _mm_set1_epi8(1);
16772        let src = _mm_set1_epi16(1);
16773        let r = _mm_mask_maddubs_epi16(src, 0, a, b);
16774        assert_eq_m128i(r, src);
16775        let r = _mm_mask_add_epi16(src, 0b00000001, a, b);
16776        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
16777        assert_eq_m128i(r, e);
16778    }
16779
16780    #[simd_test(enable = "avx512bw,avx512vl")]
16781    unsafe fn test_mm_maskz_maddubs_epi16() {
16782        let a = _mm_set1_epi8(1);
16783        let b = _mm_set1_epi8(1);
16784        let r = _mm_maskz_maddubs_epi16(0, a, b);
16785        assert_eq_m128i(r, _mm_setzero_si128());
16786        let r = _mm_maskz_maddubs_epi16(0b00001111, a, b);
16787        let e = _mm_set_epi16(0, 0, 0, 0, 2, 2, 2, 2);
16788        assert_eq_m128i(r, e);
16789    }
16790
16791    #[simd_test(enable = "avx512bw")]
16792    unsafe fn test_mm512_packs_epi32() {
16793        let a = _mm512_set1_epi32(i32::MAX);
16794        let b = _mm512_set1_epi32(1);
16795        let r = _mm512_packs_epi32(a, b);
16796        #[rustfmt::skip]
16797        let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX,
16798                                 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16799        assert_eq_m512i(r, e);
16800    }
16801
16802    #[simd_test(enable = "avx512bw")]
16803    unsafe fn test_mm512_mask_packs_epi32() {
16804        let a = _mm512_set1_epi32(i32::MAX);
16805        let b = _mm512_set1_epi32(1 << 16 | 1);
16806        let r = _mm512_mask_packs_epi32(a, 0, a, b);
16807        assert_eq_m512i(r, a);
16808        let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
16809        #[rustfmt::skip]
16810        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16811                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16812        assert_eq_m512i(r, e);
16813    }
16814
16815    #[simd_test(enable = "avx512bw")]
16816    unsafe fn test_mm512_maskz_packs_epi32() {
16817        let a = _mm512_set1_epi32(i32::MAX);
16818        let b = _mm512_set1_epi32(1);
16819        let r = _mm512_maskz_packs_epi32(0, a, b);
16820        assert_eq_m512i(r, _mm512_setzero_si512());
16821        let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b);
16822        #[rustfmt::skip]
16823        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16824                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16825        assert_eq_m512i(r, e);
16826    }
16827
16828    #[simd_test(enable = "avx512bw,avx512vl")]
16829    unsafe fn test_mm256_mask_packs_epi32() {
16830        let a = _mm256_set1_epi32(i32::MAX);
16831        let b = _mm256_set1_epi32(1 << 16 | 1);
16832        let r = _mm256_mask_packs_epi32(a, 0, a, b);
16833        assert_eq_m256i(r, a);
16834        let r = _mm256_mask_packs_epi32(b, 0b00000000_00001111, a, b);
16835        #[rustfmt::skip]
16836        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16837        assert_eq_m256i(r, e);
16838    }
16839
16840    #[simd_test(enable = "avx512bw,avx512vl")]
16841    unsafe fn test_mm256_maskz_packs_epi32() {
16842        let a = _mm256_set1_epi32(i32::MAX);
16843        let b = _mm256_set1_epi32(1);
16844        let r = _mm256_maskz_packs_epi32(0, a, b);
16845        assert_eq_m256i(r, _mm256_setzero_si256());
16846        let r = _mm256_maskz_packs_epi32(0b00000000_00001111, a, b);
16847        #[rustfmt::skip]
16848        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16849        assert_eq_m256i(r, e);
16850    }
16851
16852    #[simd_test(enable = "avx512bw,avx512vl")]
16853    unsafe fn test_mm_mask_packs_epi32() {
16854        let a = _mm_set1_epi32(i32::MAX);
16855        let b = _mm_set1_epi32(1 << 16 | 1);
16856        let r = _mm_mask_packs_epi32(a, 0, a, b);
16857        assert_eq_m128i(r, a);
16858        let r = _mm_mask_packs_epi32(b, 0b00001111, a, b);
16859        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16860        assert_eq_m128i(r, e);
16861    }
16862
16863    #[simd_test(enable = "avx512bw,avx512vl")]
16864    unsafe fn test_mm_maskz_packs_epi32() {
16865        let a = _mm_set1_epi32(i32::MAX);
16866        let b = _mm_set1_epi32(1);
16867        let r = _mm_maskz_packs_epi32(0, a, b);
16868        assert_eq_m128i(r, _mm_setzero_si128());
16869        let r = _mm_maskz_packs_epi32(0b00001111, a, b);
16870        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16871        assert_eq_m128i(r, e);
16872    }
16873
16874    #[simd_test(enable = "avx512bw")]
16875    unsafe fn test_mm512_packs_epi16() {
16876        let a = _mm512_set1_epi16(i16::MAX);
16877        let b = _mm512_set1_epi16(1);
16878        let r = _mm512_packs_epi16(a, b);
16879        #[rustfmt::skip]
16880        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
16881                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
16882                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
16883                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16884        assert_eq_m512i(r, e);
16885    }
16886
16887    #[simd_test(enable = "avx512bw")]
16888    unsafe fn test_mm512_mask_packs_epi16() {
16889        let a = _mm512_set1_epi16(i16::MAX);
16890        let b = _mm512_set1_epi16(1 << 8 | 1);
16891        let r = _mm512_mask_packs_epi16(a, 0, a, b);
16892        assert_eq_m512i(r, a);
16893        let r = _mm512_mask_packs_epi16(
16894            b,
16895            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
16896            a,
16897            b,
16898        );
16899        #[rustfmt::skip]
16900        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16901                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16902                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16903                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16904        assert_eq_m512i(r, e);
16905    }
16906
16907    #[simd_test(enable = "avx512bw")]
16908    unsafe fn test_mm512_maskz_packs_epi16() {
16909        let a = _mm512_set1_epi16(i16::MAX);
16910        let b = _mm512_set1_epi16(1);
16911        let r = _mm512_maskz_packs_epi16(0, a, b);
16912        assert_eq_m512i(r, _mm512_setzero_si512());
16913        let r = _mm512_maskz_packs_epi16(
16914            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
16915            a,
16916            b,
16917        );
16918        #[rustfmt::skip]
16919        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16920                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16921                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16922                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16923        assert_eq_m512i(r, e);
16924    }
16925
16926    #[simd_test(enable = "avx512bw,avx512vl")]
16927    unsafe fn test_mm256_mask_packs_epi16() {
16928        let a = _mm256_set1_epi16(i16::MAX);
16929        let b = _mm256_set1_epi16(1 << 8 | 1);
16930        let r = _mm256_mask_packs_epi16(a, 0, a, b);
16931        assert_eq_m256i(r, a);
16932        let r = _mm256_mask_packs_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
16933        #[rustfmt::skip]
16934        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16935                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16936        assert_eq_m256i(r, e);
16937    }
16938
16939    #[simd_test(enable = "avx512bw,avx512vl")]
16940    unsafe fn test_mm256_maskz_packs_epi16() {
16941        let a = _mm256_set1_epi16(i16::MAX);
16942        let b = _mm256_set1_epi16(1);
16943        let r = _mm256_maskz_packs_epi16(0, a, b);
16944        assert_eq_m256i(r, _mm256_setzero_si256());
16945        let r = _mm256_maskz_packs_epi16(0b00000000_00000000_00000000_00001111, a, b);
16946        #[rustfmt::skip]
16947        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16948                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16949        assert_eq_m256i(r, e);
16950    }
16951
16952    #[simd_test(enable = "avx512bw,avx512vl")]
16953    unsafe fn test_mm_mask_packs_epi16() {
16954        let a = _mm_set1_epi16(i16::MAX);
16955        let b = _mm_set1_epi16(1 << 8 | 1);
16956        let r = _mm_mask_packs_epi16(a, 0, a, b);
16957        assert_eq_m128i(r, a);
16958        let r = _mm_mask_packs_epi16(b, 0b00000000_00001111, a, b);
16959        #[rustfmt::skip]
16960        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16961        assert_eq_m128i(r, e);
16962    }
16963
16964    #[simd_test(enable = "avx512bw,avx512vl")]
16965    unsafe fn test_mm_maskz_packs_epi16() {
16966        let a = _mm_set1_epi16(i16::MAX);
16967        let b = _mm_set1_epi16(1);
16968        let r = _mm_maskz_packs_epi16(0, a, b);
16969        assert_eq_m128i(r, _mm_setzero_si128());
16970        let r = _mm_maskz_packs_epi16(0b00000000_00001111, a, b);
16971        #[rustfmt::skip]
16972        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16973        assert_eq_m128i(r, e);
16974    }
16975
16976    #[simd_test(enable = "avx512bw")]
16977    unsafe fn test_mm512_packus_epi32() {
16978        let a = _mm512_set1_epi32(-1);
16979        let b = _mm512_set1_epi32(1);
16980        let r = _mm512_packus_epi32(a, b);
16981        #[rustfmt::skip]
16982        let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
16983                                 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0);
16984        assert_eq_m512i(r, e);
16985    }
16986
16987    #[simd_test(enable = "avx512bw")]
16988    unsafe fn test_mm512_mask_packus_epi32() {
16989        let a = _mm512_set1_epi32(-1);
16990        let b = _mm512_set1_epi32(1 << 16 | 1);
16991        let r = _mm512_mask_packus_epi32(a, 0, a, b);
16992        assert_eq_m512i(r, a);
16993        let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
16994        #[rustfmt::skip]
16995        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16996                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
16997        assert_eq_m512i(r, e);
16998    }
16999
17000    #[simd_test(enable = "avx512bw")]
17001    unsafe fn test_mm512_maskz_packus_epi32() {
17002        let a = _mm512_set1_epi32(-1);
17003        let b = _mm512_set1_epi32(1);
17004        let r = _mm512_maskz_packus_epi32(0, a, b);
17005        assert_eq_m512i(r, _mm512_setzero_si512());
17006        let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b);
17007        #[rustfmt::skip]
17008        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17009                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17010        assert_eq_m512i(r, e);
17011    }
17012
17013    #[simd_test(enable = "avx512bw,avx512vl")]
17014    unsafe fn test_mm256_mask_packus_epi32() {
17015        let a = _mm256_set1_epi32(-1);
17016        let b = _mm256_set1_epi32(1 << 16 | 1);
17017        let r = _mm256_mask_packus_epi32(a, 0, a, b);
17018        assert_eq_m256i(r, a);
17019        let r = _mm256_mask_packus_epi32(b, 0b00000000_00001111, a, b);
17020        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17021        assert_eq_m256i(r, e);
17022    }
17023
17024    #[simd_test(enable = "avx512bw,avx512vl")]
17025    unsafe fn test_mm256_maskz_packus_epi32() {
17026        let a = _mm256_set1_epi32(-1);
17027        let b = _mm256_set1_epi32(1);
17028        let r = _mm256_maskz_packus_epi32(0, a, b);
17029        assert_eq_m256i(r, _mm256_setzero_si256());
17030        let r = _mm256_maskz_packus_epi32(0b00000000_00001111, a, b);
17031        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17032        assert_eq_m256i(r, e);
17033    }
17034
17035    #[simd_test(enable = "avx512bw,avx512vl")]
17036    unsafe fn test_mm_mask_packus_epi32() {
17037        let a = _mm_set1_epi32(-1);
17038        let b = _mm_set1_epi32(1 << 16 | 1);
17039        let r = _mm_mask_packus_epi32(a, 0, a, b);
17040        assert_eq_m128i(r, a);
17041        let r = _mm_mask_packus_epi32(b, 0b00001111, a, b);
17042        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
17043        assert_eq_m128i(r, e);
17044    }
17045
17046    #[simd_test(enable = "avx512bw,avx512vl")]
17047    unsafe fn test_mm_maskz_packus_epi32() {
17048        let a = _mm_set1_epi32(-1);
17049        let b = _mm_set1_epi32(1);
17050        let r = _mm_maskz_packus_epi32(0, a, b);
17051        assert_eq_m128i(r, _mm_setzero_si128());
17052        let r = _mm_maskz_packus_epi32(0b00001111, a, b);
17053        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
17054        assert_eq_m128i(r, e);
17055    }
17056
17057    #[simd_test(enable = "avx512bw")]
17058    unsafe fn test_mm512_packus_epi16() {
17059        let a = _mm512_set1_epi16(-1);
17060        let b = _mm512_set1_epi16(1);
17061        let r = _mm512_packus_epi16(a, b);
17062        #[rustfmt::skip]
17063        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17064                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17065                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17066                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
17067        assert_eq_m512i(r, e);
17068    }
17069
17070    #[simd_test(enable = "avx512bw")]
17071    unsafe fn test_mm512_mask_packus_epi16() {
17072        let a = _mm512_set1_epi16(-1);
17073        let b = _mm512_set1_epi16(1 << 8 | 1);
17074        let r = _mm512_mask_packus_epi16(a, 0, a, b);
17075        assert_eq_m512i(r, a);
17076        let r = _mm512_mask_packus_epi16(
17077            b,
17078            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17079            a,
17080            b,
17081        );
17082        #[rustfmt::skip]
17083        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17084                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17085                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17086                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17087        assert_eq_m512i(r, e);
17088    }
17089
17090    #[simd_test(enable = "avx512bw")]
17091    unsafe fn test_mm512_maskz_packus_epi16() {
17092        let a = _mm512_set1_epi16(-1);
17093        let b = _mm512_set1_epi16(1);
17094        let r = _mm512_maskz_packus_epi16(0, a, b);
17095        assert_eq_m512i(r, _mm512_setzero_si512());
17096        let r = _mm512_maskz_packus_epi16(
17097            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17098            a,
17099            b,
17100        );
17101        #[rustfmt::skip]
17102        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17103                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17104                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17105                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17106        assert_eq_m512i(r, e);
17107    }
17108
17109    #[simd_test(enable = "avx512bw,avx512vl")]
17110    unsafe fn test_mm256_mask_packus_epi16() {
17111        let a = _mm256_set1_epi16(-1);
17112        let b = _mm256_set1_epi16(1 << 8 | 1);
17113        let r = _mm256_mask_packus_epi16(a, 0, a, b);
17114        assert_eq_m256i(r, a);
17115        let r = _mm256_mask_packus_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
17116        #[rustfmt::skip]
17117        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17118                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17119        assert_eq_m256i(r, e);
17120    }
17121
17122    #[simd_test(enable = "avx512bw,avx512vl")]
17123    unsafe fn test_mm256_maskz_packus_epi16() {
17124        let a = _mm256_set1_epi16(-1);
17125        let b = _mm256_set1_epi16(1);
17126        let r = _mm256_maskz_packus_epi16(0, a, b);
17127        assert_eq_m256i(r, _mm256_setzero_si256());
17128        let r = _mm256_maskz_packus_epi16(0b00000000_00000000_00000000_00001111, a, b);
17129        #[rustfmt::skip]
17130        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17131                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17132        assert_eq_m256i(r, e);
17133    }
17134
17135    #[simd_test(enable = "avx512bw,avx512vl")]
17136    unsafe fn test_mm_mask_packus_epi16() {
17137        let a = _mm_set1_epi16(-1);
17138        let b = _mm_set1_epi16(1 << 8 | 1);
17139        let r = _mm_mask_packus_epi16(a, 0, a, b);
17140        assert_eq_m128i(r, a);
17141        let r = _mm_mask_packus_epi16(b, 0b00000000_00001111, a, b);
17142        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17143        assert_eq_m128i(r, e);
17144    }
17145
17146    #[simd_test(enable = "avx512bw,avx512vl")]
17147    unsafe fn test_mm_maskz_packus_epi16() {
17148        let a = _mm_set1_epi16(-1);
17149        let b = _mm_set1_epi16(1);
17150        let r = _mm_maskz_packus_epi16(0, a, b);
17151        assert_eq_m128i(r, _mm_setzero_si128());
17152        let r = _mm_maskz_packus_epi16(0b00000000_00001111, a, b);
17153        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17154        assert_eq_m128i(r, e);
17155    }
17156
17157    #[simd_test(enable = "avx512bw")]
17158    unsafe fn test_mm512_avg_epu16() {
17159        let a = _mm512_set1_epi16(1);
17160        let b = _mm512_set1_epi16(1);
17161        let r = _mm512_avg_epu16(a, b);
17162        let e = _mm512_set1_epi16(1);
17163        assert_eq_m512i(r, e);
17164    }
17165
17166    #[simd_test(enable = "avx512bw")]
17167    unsafe fn test_mm512_mask_avg_epu16() {
17168        let a = _mm512_set1_epi16(1);
17169        let b = _mm512_set1_epi16(1);
17170        let r = _mm512_mask_avg_epu16(a, 0, a, b);
17171        assert_eq_m512i(r, a);
17172        let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
17173        #[rustfmt::skip]
17174        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17175                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17176        assert_eq_m512i(r, e);
17177    }
17178
17179    #[simd_test(enable = "avx512bw")]
17180    unsafe fn test_mm512_maskz_avg_epu16() {
17181        let a = _mm512_set1_epi16(1);
17182        let b = _mm512_set1_epi16(1);
17183        let r = _mm512_maskz_avg_epu16(0, a, b);
17184        assert_eq_m512i(r, _mm512_setzero_si512());
17185        let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b);
17186        #[rustfmt::skip]
17187        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17188                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17189        assert_eq_m512i(r, e);
17190    }
17191
17192    #[simd_test(enable = "avx512bw,avx512vl")]
17193    unsafe fn test_mm256_mask_avg_epu16() {
17194        let a = _mm256_set1_epi16(1);
17195        let b = _mm256_set1_epi16(1);
17196        let r = _mm256_mask_avg_epu16(a, 0, a, b);
17197        assert_eq_m256i(r, a);
17198        let r = _mm256_mask_avg_epu16(a, 0b00000000_00001111, a, b);
17199        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17200        assert_eq_m256i(r, e);
17201    }
17202
17203    #[simd_test(enable = "avx512bw,avx512vl")]
17204    unsafe fn test_mm256_maskz_avg_epu16() {
17205        let a = _mm256_set1_epi16(1);
17206        let b = _mm256_set1_epi16(1);
17207        let r = _mm256_maskz_avg_epu16(0, a, b);
17208        assert_eq_m256i(r, _mm256_setzero_si256());
17209        let r = _mm256_maskz_avg_epu16(0b00000000_00001111, a, b);
17210        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17211        assert_eq_m256i(r, e);
17212    }
17213
17214    #[simd_test(enable = "avx512bw,avx512vl")]
17215    unsafe fn test_mm_mask_avg_epu16() {
17216        let a = _mm_set1_epi16(1);
17217        let b = _mm_set1_epi16(1);
17218        let r = _mm_mask_avg_epu16(a, 0, a, b);
17219        assert_eq_m128i(r, a);
17220        let r = _mm_mask_avg_epu16(a, 0b00001111, a, b);
17221        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
17222        assert_eq_m128i(r, e);
17223    }
17224
17225    #[simd_test(enable = "avx512bw,avx512vl")]
17226    unsafe fn test_mm_maskz_avg_epu16() {
17227        let a = _mm_set1_epi16(1);
17228        let b = _mm_set1_epi16(1);
17229        let r = _mm_maskz_avg_epu16(0, a, b);
17230        assert_eq_m128i(r, _mm_setzero_si128());
17231        let r = _mm_maskz_avg_epu16(0b00001111, a, b);
17232        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
17233        assert_eq_m128i(r, e);
17234    }
17235
17236    #[simd_test(enable = "avx512bw")]
17237    unsafe fn test_mm512_avg_epu8() {
17238        let a = _mm512_set1_epi8(1);
17239        let b = _mm512_set1_epi8(1);
17240        let r = _mm512_avg_epu8(a, b);
17241        let e = _mm512_set1_epi8(1);
17242        assert_eq_m512i(r, e);
17243    }
17244
17245    #[simd_test(enable = "avx512bw")]
17246    unsafe fn test_mm512_mask_avg_epu8() {
17247        let a = _mm512_set1_epi8(1);
17248        let b = _mm512_set1_epi8(1);
17249        let r = _mm512_mask_avg_epu8(a, 0, a, b);
17250        assert_eq_m512i(r, a);
17251        let r = _mm512_mask_avg_epu8(
17252            a,
17253            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17254            a,
17255            b,
17256        );
17257        #[rustfmt::skip]
17258        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17259                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17260                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17261                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17262        assert_eq_m512i(r, e);
17263    }
17264
17265    #[simd_test(enable = "avx512bw")]
17266    unsafe fn test_mm512_maskz_avg_epu8() {
17267        let a = _mm512_set1_epi8(1);
17268        let b = _mm512_set1_epi8(1);
17269        let r = _mm512_maskz_avg_epu8(0, a, b);
17270        assert_eq_m512i(r, _mm512_setzero_si512());
17271        let r = _mm512_maskz_avg_epu8(
17272            0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111,
17273            a,
17274            b,
17275        );
17276        #[rustfmt::skip]
17277        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17278                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17279                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17280                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17281        assert_eq_m512i(r, e);
17282    }
17283
17284    #[simd_test(enable = "avx512bw,avx512vl")]
17285    unsafe fn test_mm256_mask_avg_epu8() {
17286        let a = _mm256_set1_epi8(1);
17287        let b = _mm256_set1_epi8(1);
17288        let r = _mm256_mask_avg_epu8(a, 0, a, b);
17289        assert_eq_m256i(r, a);
17290        let r = _mm256_mask_avg_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
17291        #[rustfmt::skip]
17292        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17293                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17294        assert_eq_m256i(r, e);
17295    }
17296
17297    #[simd_test(enable = "avx512bw,avx512vl")]
17298    unsafe fn test_mm256_maskz_avg_epu8() {
17299        let a = _mm256_set1_epi8(1);
17300        let b = _mm256_set1_epi8(1);
17301        let r = _mm256_maskz_avg_epu8(0, a, b);
17302        assert_eq_m256i(r, _mm256_setzero_si256());
17303        let r = _mm256_maskz_avg_epu8(0b00000000_0000000_00000000_00001111, a, b);
17304        #[rustfmt::skip]
17305        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17306                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17307        assert_eq_m256i(r, e);
17308    }
17309
17310    #[simd_test(enable = "avx512bw,avx512vl")]
17311    unsafe fn test_mm_mask_avg_epu8() {
17312        let a = _mm_set1_epi8(1);
17313        let b = _mm_set1_epi8(1);
17314        let r = _mm_mask_avg_epu8(a, 0, a, b);
17315        assert_eq_m128i(r, a);
17316        let r = _mm_mask_avg_epu8(a, 0b00000000_00001111, a, b);
17317        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17318        assert_eq_m128i(r, e);
17319    }
17320
17321    #[simd_test(enable = "avx512bw,avx512vl")]
17322    unsafe fn test_mm_maskz_avg_epu8() {
17323        let a = _mm_set1_epi8(1);
17324        let b = _mm_set1_epi8(1);
17325        let r = _mm_maskz_avg_epu8(0, a, b);
17326        assert_eq_m128i(r, _mm_setzero_si128());
17327        let r = _mm_maskz_avg_epu8(0b00000000_00001111, a, b);
17328        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17329        assert_eq_m128i(r, e);
17330    }
17331
17332    #[simd_test(enable = "avx512bw")]
17333    unsafe fn test_mm512_sll_epi16() {
17334        let a = _mm512_set1_epi16(1 << 15);
17335        let count = _mm_set1_epi16(2);
17336        let r = _mm512_sll_epi16(a, count);
17337        let e = _mm512_set1_epi16(0);
17338        assert_eq_m512i(r, e);
17339    }
17340
17341    #[simd_test(enable = "avx512bw")]
17342    unsafe fn test_mm512_mask_sll_epi16() {
17343        let a = _mm512_set1_epi16(1 << 15);
17344        let count = _mm_set1_epi16(2);
17345        let r = _mm512_mask_sll_epi16(a, 0, a, count);
17346        assert_eq_m512i(r, a);
17347        let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17348        let e = _mm512_set1_epi16(0);
17349        assert_eq_m512i(r, e);
17350    }
17351
17352    #[simd_test(enable = "avx512bw")]
17353    unsafe fn test_mm512_maskz_sll_epi16() {
17354        let a = _mm512_set1_epi16(1 << 15);
17355        let count = _mm_set1_epi16(2);
17356        let r = _mm512_maskz_sll_epi16(0, a, count);
17357        assert_eq_m512i(r, _mm512_setzero_si512());
17358        let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count);
17359        let e = _mm512_set1_epi16(0);
17360        assert_eq_m512i(r, e);
17361    }
17362
17363    #[simd_test(enable = "avx512bw,avx512vl")]
17364    unsafe fn test_mm256_mask_sll_epi16() {
17365        let a = _mm256_set1_epi16(1 << 15);
17366        let count = _mm_set1_epi16(2);
17367        let r = _mm256_mask_sll_epi16(a, 0, a, count);
17368        assert_eq_m256i(r, a);
17369        let r = _mm256_mask_sll_epi16(a, 0b11111111_11111111, a, count);
17370        let e = _mm256_set1_epi16(0);
17371        assert_eq_m256i(r, e);
17372    }
17373
17374    #[simd_test(enable = "avx512bw,avx512vl")]
17375    unsafe fn test_mm256_maskz_sll_epi16() {
17376        let a = _mm256_set1_epi16(1 << 15);
17377        let count = _mm_set1_epi16(2);
17378        let r = _mm256_maskz_sll_epi16(0, a, count);
17379        assert_eq_m256i(r, _mm256_setzero_si256());
17380        let r = _mm256_maskz_sll_epi16(0b11111111_11111111, a, count);
17381        let e = _mm256_set1_epi16(0);
17382        assert_eq_m256i(r, e);
17383    }
17384
17385    #[simd_test(enable = "avx512bw,avx512vl")]
17386    unsafe fn test_mm_mask_sll_epi16() {
17387        let a = _mm_set1_epi16(1 << 15);
17388        let count = _mm_set1_epi16(2);
17389        let r = _mm_mask_sll_epi16(a, 0, a, count);
17390        assert_eq_m128i(r, a);
17391        let r = _mm_mask_sll_epi16(a, 0b11111111, a, count);
17392        let e = _mm_set1_epi16(0);
17393        assert_eq_m128i(r, e);
17394    }
17395
17396    #[simd_test(enable = "avx512bw,avx512vl")]
17397    unsafe fn test_mm_maskz_sll_epi16() {
17398        let a = _mm_set1_epi16(1 << 15);
17399        let count = _mm_set1_epi16(2);
17400        let r = _mm_maskz_sll_epi16(0, a, count);
17401        assert_eq_m128i(r, _mm_setzero_si128());
17402        let r = _mm_maskz_sll_epi16(0b11111111, a, count);
17403        let e = _mm_set1_epi16(0);
17404        assert_eq_m128i(r, e);
17405    }
17406
17407    #[simd_test(enable = "avx512bw")]
17408    unsafe fn test_mm512_slli_epi16() {
17409        let a = _mm512_set1_epi16(1 << 15);
17410        let r = _mm512_slli_epi16::<1>(a);
17411        let e = _mm512_set1_epi16(0);
17412        assert_eq_m512i(r, e);
17413    }
17414
17415    #[simd_test(enable = "avx512bw")]
17416    unsafe fn test_mm512_mask_slli_epi16() {
17417        let a = _mm512_set1_epi16(1 << 15);
17418        let r = _mm512_mask_slli_epi16::<1>(a, 0, a);
17419        assert_eq_m512i(r, a);
17420        let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a);
17421        let e = _mm512_set1_epi16(0);
17422        assert_eq_m512i(r, e);
17423    }
17424
17425    #[simd_test(enable = "avx512bw")]
17426    unsafe fn test_mm512_maskz_slli_epi16() {
17427        let a = _mm512_set1_epi16(1 << 15);
17428        let r = _mm512_maskz_slli_epi16::<1>(0, a);
17429        assert_eq_m512i(r, _mm512_setzero_si512());
17430        let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a);
17431        let e = _mm512_set1_epi16(0);
17432        assert_eq_m512i(r, e);
17433    }
17434
17435    #[simd_test(enable = "avx512bw,avx512vl")]
17436    unsafe fn test_mm256_mask_slli_epi16() {
17437        let a = _mm256_set1_epi16(1 << 15);
17438        let r = _mm256_mask_slli_epi16::<1>(a, 0, a);
17439        assert_eq_m256i(r, a);
17440        let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a);
17441        let e = _mm256_set1_epi16(0);
17442        assert_eq_m256i(r, e);
17443    }
17444
17445    #[simd_test(enable = "avx512bw,avx512vl")]
17446    unsafe fn test_mm256_maskz_slli_epi16() {
17447        let a = _mm256_set1_epi16(1 << 15);
17448        let r = _mm256_maskz_slli_epi16::<1>(0, a);
17449        assert_eq_m256i(r, _mm256_setzero_si256());
17450        let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a);
17451        let e = _mm256_set1_epi16(0);
17452        assert_eq_m256i(r, e);
17453    }
17454
17455    #[simd_test(enable = "avx512bw,avx512vl")]
17456    unsafe fn test_mm_mask_slli_epi16() {
17457        let a = _mm_set1_epi16(1 << 15);
17458        let r = _mm_mask_slli_epi16::<1>(a, 0, a);
17459        assert_eq_m128i(r, a);
17460        let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a);
17461        let e = _mm_set1_epi16(0);
17462        assert_eq_m128i(r, e);
17463    }
17464
17465    #[simd_test(enable = "avx512bw,avx512vl")]
17466    unsafe fn test_mm_maskz_slli_epi16() {
17467        let a = _mm_set1_epi16(1 << 15);
17468        let r = _mm_maskz_slli_epi16::<1>(0, a);
17469        assert_eq_m128i(r, _mm_setzero_si128());
17470        let r = _mm_maskz_slli_epi16::<1>(0b11111111, a);
17471        let e = _mm_set1_epi16(0);
17472        assert_eq_m128i(r, e);
17473    }
17474
17475    #[simd_test(enable = "avx512bw")]
17476    unsafe fn test_mm512_sllv_epi16() {
17477        let a = _mm512_set1_epi16(1 << 15);
17478        let count = _mm512_set1_epi16(2);
17479        let r = _mm512_sllv_epi16(a, count);
17480        let e = _mm512_set1_epi16(0);
17481        assert_eq_m512i(r, e);
17482    }
17483
17484    #[simd_test(enable = "avx512bw")]
17485    unsafe fn test_mm512_mask_sllv_epi16() {
17486        let a = _mm512_set1_epi16(1 << 15);
17487        let count = _mm512_set1_epi16(2);
17488        let r = _mm512_mask_sllv_epi16(a, 0, a, count);
17489        assert_eq_m512i(r, a);
17490        let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17491        let e = _mm512_set1_epi16(0);
17492        assert_eq_m512i(r, e);
17493    }
17494
17495    #[simd_test(enable = "avx512bw")]
17496    unsafe fn test_mm512_maskz_sllv_epi16() {
17497        let a = _mm512_set1_epi16(1 << 15);
17498        let count = _mm512_set1_epi16(2);
17499        let r = _mm512_maskz_sllv_epi16(0, a, count);
17500        assert_eq_m512i(r, _mm512_setzero_si512());
17501        let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count);
17502        let e = _mm512_set1_epi16(0);
17503        assert_eq_m512i(r, e);
17504    }
17505
17506    #[simd_test(enable = "avx512bw,avx512vl")]
17507    unsafe fn test_mm256_sllv_epi16() {
17508        let a = _mm256_set1_epi16(1 << 15);
17509        let count = _mm256_set1_epi16(2);
17510        let r = _mm256_sllv_epi16(a, count);
17511        let e = _mm256_set1_epi16(0);
17512        assert_eq_m256i(r, e);
17513    }
17514
17515    #[simd_test(enable = "avx512bw,avx512vl")]
17516    unsafe fn test_mm256_mask_sllv_epi16() {
17517        let a = _mm256_set1_epi16(1 << 15);
17518        let count = _mm256_set1_epi16(2);
17519        let r = _mm256_mask_sllv_epi16(a, 0, a, count);
17520        assert_eq_m256i(r, a);
17521        let r = _mm256_mask_sllv_epi16(a, 0b11111111_11111111, a, count);
17522        let e = _mm256_set1_epi16(0);
17523        assert_eq_m256i(r, e);
17524    }
17525
17526    #[simd_test(enable = "avx512bw,avx512vl")]
17527    unsafe fn test_mm256_maskz_sllv_epi16() {
17528        let a = _mm256_set1_epi16(1 << 15);
17529        let count = _mm256_set1_epi16(2);
17530        let r = _mm256_maskz_sllv_epi16(0, a, count);
17531        assert_eq_m256i(r, _mm256_setzero_si256());
17532        let r = _mm256_maskz_sllv_epi16(0b11111111_11111111, a, count);
17533        let e = _mm256_set1_epi16(0);
17534        assert_eq_m256i(r, e);
17535    }
17536
17537    #[simd_test(enable = "avx512bw,avx512vl")]
17538    unsafe fn test_mm_sllv_epi16() {
17539        let a = _mm_set1_epi16(1 << 15);
17540        let count = _mm_set1_epi16(2);
17541        let r = _mm_sllv_epi16(a, count);
17542        let e = _mm_set1_epi16(0);
17543        assert_eq_m128i(r, e);
17544    }
17545
17546    #[simd_test(enable = "avx512bw,avx512vl")]
17547    unsafe fn test_mm_mask_sllv_epi16() {
17548        let a = _mm_set1_epi16(1 << 15);
17549        let count = _mm_set1_epi16(2);
17550        let r = _mm_mask_sllv_epi16(a, 0, a, count);
17551        assert_eq_m128i(r, a);
17552        let r = _mm_mask_sllv_epi16(a, 0b11111111, a, count);
17553        let e = _mm_set1_epi16(0);
17554        assert_eq_m128i(r, e);
17555    }
17556
17557    #[simd_test(enable = "avx512bw,avx512vl")]
17558    unsafe fn test_mm_maskz_sllv_epi16() {
17559        let a = _mm_set1_epi16(1 << 15);
17560        let count = _mm_set1_epi16(2);
17561        let r = _mm_maskz_sllv_epi16(0, a, count);
17562        assert_eq_m128i(r, _mm_setzero_si128());
17563        let r = _mm_maskz_sllv_epi16(0b11111111, a, count);
17564        let e = _mm_set1_epi16(0);
17565        assert_eq_m128i(r, e);
17566    }
17567
17568    #[simd_test(enable = "avx512bw")]
17569    unsafe fn test_mm512_srl_epi16() {
17570        let a = _mm512_set1_epi16(1 << 1);
17571        let count = _mm_set1_epi16(2);
17572        let r = _mm512_srl_epi16(a, count);
17573        let e = _mm512_set1_epi16(0);
17574        assert_eq_m512i(r, e);
17575    }
17576
17577    #[simd_test(enable = "avx512bw")]
17578    unsafe fn test_mm512_mask_srl_epi16() {
17579        let a = _mm512_set1_epi16(1 << 1);
17580        let count = _mm_set1_epi16(2);
17581        let r = _mm512_mask_srl_epi16(a, 0, a, count);
17582        assert_eq_m512i(r, a);
17583        let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17584        let e = _mm512_set1_epi16(0);
17585        assert_eq_m512i(r, e);
17586    }
17587
17588    #[simd_test(enable = "avx512bw")]
17589    unsafe fn test_mm512_maskz_srl_epi16() {
17590        let a = _mm512_set1_epi16(1 << 1);
17591        let count = _mm_set1_epi16(2);
17592        let r = _mm512_maskz_srl_epi16(0, a, count);
17593        assert_eq_m512i(r, _mm512_setzero_si512());
17594        let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count);
17595        let e = _mm512_set1_epi16(0);
17596        assert_eq_m512i(r, e);
17597    }
17598
17599    #[simd_test(enable = "avx512bw,avx512vl")]
17600    unsafe fn test_mm256_mask_srl_epi16() {
17601        let a = _mm256_set1_epi16(1 << 1);
17602        let count = _mm_set1_epi16(2);
17603        let r = _mm256_mask_srl_epi16(a, 0, a, count);
17604        assert_eq_m256i(r, a);
17605        let r = _mm256_mask_srl_epi16(a, 0b11111111_11111111, a, count);
17606        let e = _mm256_set1_epi16(0);
17607        assert_eq_m256i(r, e);
17608    }
17609
17610    #[simd_test(enable = "avx512bw,avx512vl")]
17611    unsafe fn test_mm256_maskz_srl_epi16() {
17612        let a = _mm256_set1_epi16(1 << 1);
17613        let count = _mm_set1_epi16(2);
17614        let r = _mm256_maskz_srl_epi16(0, a, count);
17615        assert_eq_m256i(r, _mm256_setzero_si256());
17616        let r = _mm256_maskz_srl_epi16(0b11111111_11111111, a, count);
17617        let e = _mm256_set1_epi16(0);
17618        assert_eq_m256i(r, e);
17619    }
17620
17621    #[simd_test(enable = "avx512bw,avx512vl")]
17622    unsafe fn test_mm_mask_srl_epi16() {
17623        let a = _mm_set1_epi16(1 << 1);
17624        let count = _mm_set1_epi16(2);
17625        let r = _mm_mask_srl_epi16(a, 0, a, count);
17626        assert_eq_m128i(r, a);
17627        let r = _mm_mask_srl_epi16(a, 0b11111111, a, count);
17628        let e = _mm_set1_epi16(0);
17629        assert_eq_m128i(r, e);
17630    }
17631
17632    #[simd_test(enable = "avx512bw,avx512vl")]
17633    unsafe fn test_mm_maskz_srl_epi16() {
17634        let a = _mm_set1_epi16(1 << 1);
17635        let count = _mm_set1_epi16(2);
17636        let r = _mm_maskz_srl_epi16(0, a, count);
17637        assert_eq_m128i(r, _mm_setzero_si128());
17638        let r = _mm_maskz_srl_epi16(0b11111111, a, count);
17639        let e = _mm_set1_epi16(0);
17640        assert_eq_m128i(r, e);
17641    }
17642
17643    #[simd_test(enable = "avx512bw")]
17644    unsafe fn test_mm512_srli_epi16() {
17645        let a = _mm512_set1_epi16(1 << 1);
17646        let r = _mm512_srli_epi16::<2>(a);
17647        let e = _mm512_set1_epi16(0);
17648        assert_eq_m512i(r, e);
17649    }
17650
17651    #[simd_test(enable = "avx512bw")]
17652    unsafe fn test_mm512_mask_srli_epi16() {
17653        let a = _mm512_set1_epi16(1 << 1);
17654        let r = _mm512_mask_srli_epi16::<2>(a, 0, a);
17655        assert_eq_m512i(r, a);
17656        let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
17657        let e = _mm512_set1_epi16(0);
17658        assert_eq_m512i(r, e);
17659    }
17660
17661    #[simd_test(enable = "avx512bw")]
17662    unsafe fn test_mm512_maskz_srli_epi16() {
17663        let a = _mm512_set1_epi16(1 << 1);
17664        let r = _mm512_maskz_srli_epi16::<2>(0, a);
17665        assert_eq_m512i(r, _mm512_setzero_si512());
17666        let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
17667        let e = _mm512_set1_epi16(0);
17668        assert_eq_m512i(r, e);
17669    }
17670
17671    #[simd_test(enable = "avx512bw,avx512vl")]
17672    unsafe fn test_mm256_mask_srli_epi16() {
17673        let a = _mm256_set1_epi16(1 << 1);
17674        let r = _mm256_mask_srli_epi16::<2>(a, 0, a);
17675        assert_eq_m256i(r, a);
17676        let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a);
17677        let e = _mm256_set1_epi16(0);
17678        assert_eq_m256i(r, e);
17679    }
17680
17681    #[simd_test(enable = "avx512bw,avx512vl")]
17682    unsafe fn test_mm256_maskz_srli_epi16() {
17683        let a = _mm256_set1_epi16(1 << 1);
17684        let r = _mm256_maskz_srli_epi16::<2>(0, a);
17685        assert_eq_m256i(r, _mm256_setzero_si256());
17686        let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a);
17687        let e = _mm256_set1_epi16(0);
17688        assert_eq_m256i(r, e);
17689    }
17690
17691    #[simd_test(enable = "avx512bw,avx512vl")]
17692    unsafe fn test_mm_mask_srli_epi16() {
17693        let a = _mm_set1_epi16(1 << 1);
17694        let r = _mm_mask_srli_epi16::<2>(a, 0, a);
17695        assert_eq_m128i(r, a);
17696        let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a);
17697        let e = _mm_set1_epi16(0);
17698        assert_eq_m128i(r, e);
17699    }
17700
17701    #[simd_test(enable = "avx512bw,avx512vl")]
17702    unsafe fn test_mm_maskz_srli_epi16() {
17703        let a = _mm_set1_epi16(1 << 1);
17704        let r = _mm_maskz_srli_epi16::<2>(0, a);
17705        assert_eq_m128i(r, _mm_setzero_si128());
17706        let r = _mm_maskz_srli_epi16::<2>(0b11111111, a);
17707        let e = _mm_set1_epi16(0);
17708        assert_eq_m128i(r, e);
17709    }
17710
17711    #[simd_test(enable = "avx512bw")]
17712    unsafe fn test_mm512_srlv_epi16() {
17713        let a = _mm512_set1_epi16(1 << 1);
17714        let count = _mm512_set1_epi16(2);
17715        let r = _mm512_srlv_epi16(a, count);
17716        let e = _mm512_set1_epi16(0);
17717        assert_eq_m512i(r, e);
17718    }
17719
17720    #[simd_test(enable = "avx512bw")]
17721    unsafe fn test_mm512_mask_srlv_epi16() {
17722        let a = _mm512_set1_epi16(1 << 1);
17723        let count = _mm512_set1_epi16(2);
17724        let r = _mm512_mask_srlv_epi16(a, 0, a, count);
17725        assert_eq_m512i(r, a);
17726        let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17727        let e = _mm512_set1_epi16(0);
17728        assert_eq_m512i(r, e);
17729    }
17730
17731    #[simd_test(enable = "avx512bw")]
17732    unsafe fn test_mm512_maskz_srlv_epi16() {
17733        let a = _mm512_set1_epi16(1 << 1);
17734        let count = _mm512_set1_epi16(2);
17735        let r = _mm512_maskz_srlv_epi16(0, a, count);
17736        assert_eq_m512i(r, _mm512_setzero_si512());
17737        let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count);
17738        let e = _mm512_set1_epi16(0);
17739        assert_eq_m512i(r, e);
17740    }
17741
17742    #[simd_test(enable = "avx512bw,avx512vl")]
17743    unsafe fn test_mm256_srlv_epi16() {
17744        let a = _mm256_set1_epi16(1 << 1);
17745        let count = _mm256_set1_epi16(2);
17746        let r = _mm256_srlv_epi16(a, count);
17747        let e = _mm256_set1_epi16(0);
17748        assert_eq_m256i(r, e);
17749    }
17750
17751    #[simd_test(enable = "avx512bw,avx512vl")]
17752    unsafe fn test_mm256_mask_srlv_epi16() {
17753        let a = _mm256_set1_epi16(1 << 1);
17754        let count = _mm256_set1_epi16(2);
17755        let r = _mm256_mask_srlv_epi16(a, 0, a, count);
17756        assert_eq_m256i(r, a);
17757        let r = _mm256_mask_srlv_epi16(a, 0b11111111_11111111, a, count);
17758        let e = _mm256_set1_epi16(0);
17759        assert_eq_m256i(r, e);
17760    }
17761
17762    #[simd_test(enable = "avx512bw,avx512vl")]
17763    unsafe fn test_mm256_maskz_srlv_epi16() {
17764        let a = _mm256_set1_epi16(1 << 1);
17765        let count = _mm256_set1_epi16(2);
17766        let r = _mm256_maskz_srlv_epi16(0, a, count);
17767        assert_eq_m256i(r, _mm256_setzero_si256());
17768        let r = _mm256_maskz_srlv_epi16(0b11111111_11111111, a, count);
17769        let e = _mm256_set1_epi16(0);
17770        assert_eq_m256i(r, e);
17771    }
17772
17773    #[simd_test(enable = "avx512bw,avx512vl")]
17774    unsafe fn test_mm_srlv_epi16() {
17775        let a = _mm_set1_epi16(1 << 1);
17776        let count = _mm_set1_epi16(2);
17777        let r = _mm_srlv_epi16(a, count);
17778        let e = _mm_set1_epi16(0);
17779        assert_eq_m128i(r, e);
17780    }
17781
17782    #[simd_test(enable = "avx512bw,avx512vl")]
17783    unsafe fn test_mm_mask_srlv_epi16() {
17784        let a = _mm_set1_epi16(1 << 1);
17785        let count = _mm_set1_epi16(2);
17786        let r = _mm_mask_srlv_epi16(a, 0, a, count);
17787        assert_eq_m128i(r, a);
17788        let r = _mm_mask_srlv_epi16(a, 0b11111111, a, count);
17789        let e = _mm_set1_epi16(0);
17790        assert_eq_m128i(r, e);
17791    }
17792
17793    #[simd_test(enable = "avx512bw,avx512vl")]
17794    unsafe fn test_mm_maskz_srlv_epi16() {
17795        let a = _mm_set1_epi16(1 << 1);
17796        let count = _mm_set1_epi16(2);
17797        let r = _mm_maskz_srlv_epi16(0, a, count);
17798        assert_eq_m128i(r, _mm_setzero_si128());
17799        let r = _mm_maskz_srlv_epi16(0b11111111, a, count);
17800        let e = _mm_set1_epi16(0);
17801        assert_eq_m128i(r, e);
17802    }
17803
17804    #[simd_test(enable = "avx512bw")]
17805    unsafe fn test_mm512_sra_epi16() {
17806        let a = _mm512_set1_epi16(8);
17807        let count = _mm_set1_epi16(1);
17808        let r = _mm512_sra_epi16(a, count);
17809        let e = _mm512_set1_epi16(0);
17810        assert_eq_m512i(r, e);
17811    }
17812
17813    #[simd_test(enable = "avx512bw")]
17814    unsafe fn test_mm512_mask_sra_epi16() {
17815        let a = _mm512_set1_epi16(8);
17816        let count = _mm_set1_epi16(1);
17817        let r = _mm512_mask_sra_epi16(a, 0, a, count);
17818        assert_eq_m512i(r, a);
17819        let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17820        let e = _mm512_set1_epi16(0);
17821        assert_eq_m512i(r, e);
17822    }
17823
17824    #[simd_test(enable = "avx512bw")]
17825    unsafe fn test_mm512_maskz_sra_epi16() {
17826        let a = _mm512_set1_epi16(8);
17827        let count = _mm_set1_epi16(1);
17828        let r = _mm512_maskz_sra_epi16(0, a, count);
17829        assert_eq_m512i(r, _mm512_setzero_si512());
17830        let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count);
17831        let e = _mm512_set1_epi16(0);
17832        assert_eq_m512i(r, e);
17833    }
17834
17835    #[simd_test(enable = "avx512bw,avx512vl")]
17836    unsafe fn test_mm256_mask_sra_epi16() {
17837        let a = _mm256_set1_epi16(8);
17838        let count = _mm_set1_epi16(1);
17839        let r = _mm256_mask_sra_epi16(a, 0, a, count);
17840        assert_eq_m256i(r, a);
17841        let r = _mm256_mask_sra_epi16(a, 0b11111111_11111111, a, count);
17842        let e = _mm256_set1_epi16(0);
17843        assert_eq_m256i(r, e);
17844    }
17845
17846    #[simd_test(enable = "avx512bw,avx512vl")]
17847    unsafe fn test_mm256_maskz_sra_epi16() {
17848        let a = _mm256_set1_epi16(8);
17849        let count = _mm_set1_epi16(1);
17850        let r = _mm256_maskz_sra_epi16(0, a, count);
17851        assert_eq_m256i(r, _mm256_setzero_si256());
17852        let r = _mm256_maskz_sra_epi16(0b11111111_11111111, a, count);
17853        let e = _mm256_set1_epi16(0);
17854        assert_eq_m256i(r, e);
17855    }
17856
17857    #[simd_test(enable = "avx512bw,avx512vl")]
17858    unsafe fn test_mm_mask_sra_epi16() {
17859        let a = _mm_set1_epi16(8);
17860        let count = _mm_set1_epi16(1);
17861        let r = _mm_mask_sra_epi16(a, 0, a, count);
17862        assert_eq_m128i(r, a);
17863        let r = _mm_mask_sra_epi16(a, 0b11111111, a, count);
17864        let e = _mm_set1_epi16(0);
17865        assert_eq_m128i(r, e);
17866    }
17867
17868    #[simd_test(enable = "avx512bw,avx512vl")]
17869    unsafe fn test_mm_maskz_sra_epi16() {
17870        let a = _mm_set1_epi16(8);
17871        let count = _mm_set1_epi16(1);
17872        let r = _mm_maskz_sra_epi16(0, a, count);
17873        assert_eq_m128i(r, _mm_setzero_si128());
17874        let r = _mm_maskz_sra_epi16(0b11111111, a, count);
17875        let e = _mm_set1_epi16(0);
17876        assert_eq_m128i(r, e);
17877    }
17878
17879    #[simd_test(enable = "avx512bw")]
17880    unsafe fn test_mm512_srai_epi16() {
17881        let a = _mm512_set1_epi16(8);
17882        let r = _mm512_srai_epi16::<2>(a);
17883        let e = _mm512_set1_epi16(2);
17884        assert_eq_m512i(r, e);
17885    }
17886
17887    #[simd_test(enable = "avx512bw")]
17888    unsafe fn test_mm512_mask_srai_epi16() {
17889        let a = _mm512_set1_epi16(8);
17890        let r = _mm512_mask_srai_epi16::<2>(a, 0, a);
17891        assert_eq_m512i(r, a);
17892        let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
17893        let e = _mm512_set1_epi16(2);
17894        assert_eq_m512i(r, e);
17895    }
17896
17897    #[simd_test(enable = "avx512bw")]
17898    unsafe fn test_mm512_maskz_srai_epi16() {
17899        let a = _mm512_set1_epi16(8);
17900        let r = _mm512_maskz_srai_epi16::<2>(0, a);
17901        assert_eq_m512i(r, _mm512_setzero_si512());
17902        let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
17903        let e = _mm512_set1_epi16(2);
17904        assert_eq_m512i(r, e);
17905    }
17906
17907    #[simd_test(enable = "avx512bw,avx512vl")]
17908    unsafe fn test_mm256_mask_srai_epi16() {
17909        let a = _mm256_set1_epi16(8);
17910        let r = _mm256_mask_srai_epi16::<2>(a, 0, a);
17911        assert_eq_m256i(r, a);
17912        let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a);
17913        let e = _mm256_set1_epi16(2);
17914        assert_eq_m256i(r, e);
17915    }
17916
17917    #[simd_test(enable = "avx512bw,avx512vl")]
17918    unsafe fn test_mm256_maskz_srai_epi16() {
17919        let a = _mm256_set1_epi16(8);
17920        let r = _mm256_maskz_srai_epi16::<2>(0, a);
17921        assert_eq_m256i(r, _mm256_setzero_si256());
17922        let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a);
17923        let e = _mm256_set1_epi16(2);
17924        assert_eq_m256i(r, e);
17925    }
17926
17927    #[simd_test(enable = "avx512bw,avx512vl")]
17928    unsafe fn test_mm_mask_srai_epi16() {
17929        let a = _mm_set1_epi16(8);
17930        let r = _mm_mask_srai_epi16::<2>(a, 0, a);
17931        assert_eq_m128i(r, a);
17932        let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a);
17933        let e = _mm_set1_epi16(2);
17934        assert_eq_m128i(r, e);
17935    }
17936
17937    #[simd_test(enable = "avx512bw,avx512vl")]
17938    unsafe fn test_mm_maskz_srai_epi16() {
17939        let a = _mm_set1_epi16(8);
17940        let r = _mm_maskz_srai_epi16::<2>(0, a);
17941        assert_eq_m128i(r, _mm_setzero_si128());
17942        let r = _mm_maskz_srai_epi16::<2>(0b11111111, a);
17943        let e = _mm_set1_epi16(2);
17944        assert_eq_m128i(r, e);
17945    }
17946
17947    #[simd_test(enable = "avx512bw")]
17948    unsafe fn test_mm512_srav_epi16() {
17949        let a = _mm512_set1_epi16(8);
17950        let count = _mm512_set1_epi16(2);
17951        let r = _mm512_srav_epi16(a, count);
17952        let e = _mm512_set1_epi16(2);
17953        assert_eq_m512i(r, e);
17954    }
17955
17956    #[simd_test(enable = "avx512bw")]
17957    unsafe fn test_mm512_mask_srav_epi16() {
17958        let a = _mm512_set1_epi16(8);
17959        let count = _mm512_set1_epi16(2);
17960        let r = _mm512_mask_srav_epi16(a, 0, a, count);
17961        assert_eq_m512i(r, a);
17962        let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17963        let e = _mm512_set1_epi16(2);
17964        assert_eq_m512i(r, e);
17965    }
17966
17967    #[simd_test(enable = "avx512bw")]
17968    unsafe fn test_mm512_maskz_srav_epi16() {
17969        let a = _mm512_set1_epi16(8);
17970        let count = _mm512_set1_epi16(2);
17971        let r = _mm512_maskz_srav_epi16(0, a, count);
17972        assert_eq_m512i(r, _mm512_setzero_si512());
17973        let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count);
17974        let e = _mm512_set1_epi16(2);
17975        assert_eq_m512i(r, e);
17976    }
17977
17978    #[simd_test(enable = "avx512bw,avx512vl")]
17979    unsafe fn test_mm256_srav_epi16() {
17980        let a = _mm256_set1_epi16(8);
17981        let count = _mm256_set1_epi16(2);
17982        let r = _mm256_srav_epi16(a, count);
17983        let e = _mm256_set1_epi16(2);
17984        assert_eq_m256i(r, e);
17985    }
17986
17987    #[simd_test(enable = "avx512bw,avx512vl")]
17988    unsafe fn test_mm256_mask_srav_epi16() {
17989        let a = _mm256_set1_epi16(8);
17990        let count = _mm256_set1_epi16(2);
17991        let r = _mm256_mask_srav_epi16(a, 0, a, count);
17992        assert_eq_m256i(r, a);
17993        let r = _mm256_mask_srav_epi16(a, 0b11111111_11111111, a, count);
17994        let e = _mm256_set1_epi16(2);
17995        assert_eq_m256i(r, e);
17996    }
17997
17998    #[simd_test(enable = "avx512bw,avx512vl")]
17999    unsafe fn test_mm256_maskz_srav_epi16() {
18000        let a = _mm256_set1_epi16(8);
18001        let count = _mm256_set1_epi16(2);
18002        let r = _mm256_maskz_srav_epi16(0, a, count);
18003        assert_eq_m256i(r, _mm256_setzero_si256());
18004        let r = _mm256_maskz_srav_epi16(0b11111111_11111111, a, count);
18005        let e = _mm256_set1_epi16(2);
18006        assert_eq_m256i(r, e);
18007    }
18008
18009    #[simd_test(enable = "avx512bw,avx512vl")]
18010    unsafe fn test_mm_srav_epi16() {
18011        let a = _mm_set1_epi16(8);
18012        let count = _mm_set1_epi16(2);
18013        let r = _mm_srav_epi16(a, count);
18014        let e = _mm_set1_epi16(2);
18015        assert_eq_m128i(r, e);
18016    }
18017
18018    #[simd_test(enable = "avx512bw,avx512vl")]
18019    unsafe fn test_mm_mask_srav_epi16() {
18020        let a = _mm_set1_epi16(8);
18021        let count = _mm_set1_epi16(2);
18022        let r = _mm_mask_srav_epi16(a, 0, a, count);
18023        assert_eq_m128i(r, a);
18024        let r = _mm_mask_srav_epi16(a, 0b11111111, a, count);
18025        let e = _mm_set1_epi16(2);
18026        assert_eq_m128i(r, e);
18027    }
18028
18029    #[simd_test(enable = "avx512bw,avx512vl")]
18030    unsafe fn test_mm_maskz_srav_epi16() {
18031        let a = _mm_set1_epi16(8);
18032        let count = _mm_set1_epi16(2);
18033        let r = _mm_maskz_srav_epi16(0, a, count);
18034        assert_eq_m128i(r, _mm_setzero_si128());
18035        let r = _mm_maskz_srav_epi16(0b11111111, a, count);
18036        let e = _mm_set1_epi16(2);
18037        assert_eq_m128i(r, e);
18038    }
18039
18040    #[simd_test(enable = "avx512bw")]
18041    unsafe fn test_mm512_permutex2var_epi16() {
18042        #[rustfmt::skip]
18043        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18044                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18045        #[rustfmt::skip]
18046        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18047                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18048        let b = _mm512_set1_epi16(100);
18049        let r = _mm512_permutex2var_epi16(a, idx, b);
18050        #[rustfmt::skip]
18051        let e = _mm512_set_epi16(
18052            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18053            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18054        );
18055        assert_eq_m512i(r, e);
18056    }
18057
18058    #[simd_test(enable = "avx512bw")]
18059    unsafe fn test_mm512_mask_permutex2var_epi16() {
18060        #[rustfmt::skip]
18061        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18062                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18063        #[rustfmt::skip]
18064        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18065                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18066        let b = _mm512_set1_epi16(100);
18067        let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b);
18068        assert_eq_m512i(r, a);
18069        let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b);
18070        #[rustfmt::skip]
18071        let e = _mm512_set_epi16(
18072            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18073            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18074        );
18075        assert_eq_m512i(r, e);
18076    }
18077
18078    #[simd_test(enable = "avx512bw")]
18079    unsafe fn test_mm512_maskz_permutex2var_epi16() {
18080        #[rustfmt::skip]
18081        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18082                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18083        #[rustfmt::skip]
18084        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18085                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18086        let b = _mm512_set1_epi16(100);
18087        let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b);
18088        assert_eq_m512i(r, _mm512_setzero_si512());
18089        let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b);
18090        #[rustfmt::skip]
18091        let e = _mm512_set_epi16(
18092            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18093            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18094        );
18095        assert_eq_m512i(r, e);
18096    }
18097
18098    #[simd_test(enable = "avx512bw")]
18099    unsafe fn test_mm512_mask2_permutex2var_epi16() {
18100        #[rustfmt::skip]
18101        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18102                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18103        #[rustfmt::skip]
18104        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18105                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18106        let b = _mm512_set1_epi16(100);
18107        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b);
18108        assert_eq_m512i(r, idx);
18109        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b);
18110        #[rustfmt::skip]
18111        let e = _mm512_set_epi16(
18112            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18113            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18114        );
18115        assert_eq_m512i(r, e);
18116    }
18117
18118    #[simd_test(enable = "avx512bw,avx512vl")]
18119    unsafe fn test_mm256_permutex2var_epi16() {
18120        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18121        #[rustfmt::skip]
18122        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18123        let b = _mm256_set1_epi16(100);
18124        let r = _mm256_permutex2var_epi16(a, idx, b);
18125        let e = _mm256_set_epi16(
18126            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18127        );
18128        assert_eq_m256i(r, e);
18129    }
18130
18131    #[simd_test(enable = "avx512bw,avx512vl")]
18132    unsafe fn test_mm256_mask_permutex2var_epi16() {
18133        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18134        #[rustfmt::skip]
18135        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18136        let b = _mm256_set1_epi16(100);
18137        let r = _mm256_mask_permutex2var_epi16(a, 0, idx, b);
18138        assert_eq_m256i(r, a);
18139        let r = _mm256_mask_permutex2var_epi16(a, 0b11111111_11111111, idx, b);
18140        let e = _mm256_set_epi16(
18141            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18142        );
18143        assert_eq_m256i(r, e);
18144    }
18145
18146    #[simd_test(enable = "avx512bw,avx512vl")]
18147    unsafe fn test_mm256_maskz_permutex2var_epi16() {
18148        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18149        #[rustfmt::skip]
18150        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18151        let b = _mm256_set1_epi16(100);
18152        let r = _mm256_maskz_permutex2var_epi16(0, a, idx, b);
18153        assert_eq_m256i(r, _mm256_setzero_si256());
18154        let r = _mm256_maskz_permutex2var_epi16(0b11111111_11111111, a, idx, b);
18155        let e = _mm256_set_epi16(
18156            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18157        );
18158        assert_eq_m256i(r, e);
18159    }
18160
18161    #[simd_test(enable = "avx512bw,avx512vl")]
18162    unsafe fn test_mm256_mask2_permutex2var_epi16() {
18163        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18164        #[rustfmt::skip]
18165        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18166        let b = _mm256_set1_epi16(100);
18167        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0, b);
18168        assert_eq_m256i(r, idx);
18169        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111, b);
18170        #[rustfmt::skip]
18171        let e = _mm256_set_epi16(
18172            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18173        );
18174        assert_eq_m256i(r, e);
18175    }
18176
18177    #[simd_test(enable = "avx512bw,avx512vl")]
18178    unsafe fn test_mm_permutex2var_epi16() {
18179        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18180        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18181        let b = _mm_set1_epi16(100);
18182        let r = _mm_permutex2var_epi16(a, idx, b);
18183        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18184        assert_eq_m128i(r, e);
18185    }
18186
18187    #[simd_test(enable = "avx512bw,avx512vl")]
18188    unsafe fn test_mm_mask_permutex2var_epi16() {
18189        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18190        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18191        let b = _mm_set1_epi16(100);
18192        let r = _mm_mask_permutex2var_epi16(a, 0, idx, b);
18193        assert_eq_m128i(r, a);
18194        let r = _mm_mask_permutex2var_epi16(a, 0b11111111, idx, b);
18195        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18196        assert_eq_m128i(r, e);
18197    }
18198
18199    #[simd_test(enable = "avx512bw,avx512vl")]
18200    unsafe fn test_mm_maskz_permutex2var_epi16() {
18201        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18202        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18203        let b = _mm_set1_epi16(100);
18204        let r = _mm_maskz_permutex2var_epi16(0, a, idx, b);
18205        assert_eq_m128i(r, _mm_setzero_si128());
18206        let r = _mm_maskz_permutex2var_epi16(0b11111111, a, idx, b);
18207        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18208        assert_eq_m128i(r, e);
18209    }
18210
18211    #[simd_test(enable = "avx512bw,avx512vl")]
18212    unsafe fn test_mm_mask2_permutex2var_epi16() {
18213        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18214        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18215        let b = _mm_set1_epi16(100);
18216        let r = _mm_mask2_permutex2var_epi16(a, idx, 0, b);
18217        assert_eq_m128i(r, idx);
18218        let r = _mm_mask2_permutex2var_epi16(a, idx, 0b11111111, b);
18219        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18220        assert_eq_m128i(r, e);
18221    }
18222
18223    #[simd_test(enable = "avx512bw")]
18224    unsafe fn test_mm512_permutexvar_epi16() {
18225        let idx = _mm512_set1_epi16(1);
18226        #[rustfmt::skip]
18227        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18228                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18229        let r = _mm512_permutexvar_epi16(idx, a);
18230        let e = _mm512_set1_epi16(30);
18231        assert_eq_m512i(r, e);
18232    }
18233
18234    #[simd_test(enable = "avx512bw")]
18235    unsafe fn test_mm512_mask_permutexvar_epi16() {
18236        let idx = _mm512_set1_epi16(1);
18237        #[rustfmt::skip]
18238        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18239                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18240        let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a);
18241        assert_eq_m512i(r, a);
18242        let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a);
18243        let e = _mm512_set1_epi16(30);
18244        assert_eq_m512i(r, e);
18245    }
18246
18247    #[simd_test(enable = "avx512bw")]
18248    unsafe fn test_mm512_maskz_permutexvar_epi16() {
18249        let idx = _mm512_set1_epi16(1);
18250        #[rustfmt::skip]
18251        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18252                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18253        let r = _mm512_maskz_permutexvar_epi16(0, idx, a);
18254        assert_eq_m512i(r, _mm512_setzero_si512());
18255        let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a);
18256        let e = _mm512_set1_epi16(30);
18257        assert_eq_m512i(r, e);
18258    }
18259
18260    #[simd_test(enable = "avx512bw,avx512vl")]
18261    unsafe fn test_mm256_permutexvar_epi16() {
18262        let idx = _mm256_set1_epi16(1);
18263        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18264        let r = _mm256_permutexvar_epi16(idx, a);
18265        let e = _mm256_set1_epi16(14);
18266        assert_eq_m256i(r, e);
18267    }
18268
18269    #[simd_test(enable = "avx512bw,avx512vl")]
18270    unsafe fn test_mm256_mask_permutexvar_epi16() {
18271        let idx = _mm256_set1_epi16(1);
18272        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18273        let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a);
18274        assert_eq_m256i(r, a);
18275        let r = _mm256_mask_permutexvar_epi16(a, 0b11111111_11111111, idx, a);
18276        let e = _mm256_set1_epi16(14);
18277        assert_eq_m256i(r, e);
18278    }
18279
18280    #[simd_test(enable = "avx512bw,avx512vl")]
18281    unsafe fn test_mm256_maskz_permutexvar_epi16() {
18282        let idx = _mm256_set1_epi16(1);
18283        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18284        let r = _mm256_maskz_permutexvar_epi16(0, idx, a);
18285        assert_eq_m256i(r, _mm256_setzero_si256());
18286        let r = _mm256_maskz_permutexvar_epi16(0b11111111_11111111, idx, a);
18287        let e = _mm256_set1_epi16(14);
18288        assert_eq_m256i(r, e);
18289    }
18290
18291    #[simd_test(enable = "avx512bw,avx512vl")]
18292    unsafe fn test_mm_permutexvar_epi16() {
18293        let idx = _mm_set1_epi16(1);
18294        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18295        let r = _mm_permutexvar_epi16(idx, a);
18296        let e = _mm_set1_epi16(6);
18297        assert_eq_m128i(r, e);
18298    }
18299
18300    #[simd_test(enable = "avx512bw,avx512vl")]
18301    unsafe fn test_mm_mask_permutexvar_epi16() {
18302        let idx = _mm_set1_epi16(1);
18303        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18304        let r = _mm_mask_permutexvar_epi16(a, 0, idx, a);
18305        assert_eq_m128i(r, a);
18306        let r = _mm_mask_permutexvar_epi16(a, 0b11111111, idx, a);
18307        let e = _mm_set1_epi16(6);
18308        assert_eq_m128i(r, e);
18309    }
18310
18311    #[simd_test(enable = "avx512bw,avx512vl")]
18312    unsafe fn test_mm_maskz_permutexvar_epi16() {
18313        let idx = _mm_set1_epi16(1);
18314        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18315        let r = _mm_maskz_permutexvar_epi16(0, idx, a);
18316        assert_eq_m128i(r, _mm_setzero_si128());
18317        let r = _mm_maskz_permutexvar_epi16(0b11111111, idx, a);
18318        let e = _mm_set1_epi16(6);
18319        assert_eq_m128i(r, e);
18320    }
18321
18322    #[simd_test(enable = "avx512bw")]
18323    unsafe fn test_mm512_mask_blend_epi16() {
18324        let a = _mm512_set1_epi16(1);
18325        let b = _mm512_set1_epi16(2);
18326        let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b);
18327        #[rustfmt::skip]
18328        let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18329                                 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18330        assert_eq_m512i(r, e);
18331    }
18332
18333    #[simd_test(enable = "avx512bw,avx512vl")]
18334    unsafe fn test_mm256_mask_blend_epi16() {
18335        let a = _mm256_set1_epi16(1);
18336        let b = _mm256_set1_epi16(2);
18337        let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b);
18338        let e = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18339        assert_eq_m256i(r, e);
18340    }
18341
18342    #[simd_test(enable = "avx512bw,avx512vl")]
18343    unsafe fn test_mm_mask_blend_epi16() {
18344        let a = _mm_set1_epi16(1);
18345        let b = _mm_set1_epi16(2);
18346        let r = _mm_mask_blend_epi16(0b11110000, a, b);
18347        let e = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
18348        assert_eq_m128i(r, e);
18349    }
18350
18351    #[simd_test(enable = "avx512bw")]
18352    unsafe fn test_mm512_mask_blend_epi8() {
18353        let a = _mm512_set1_epi8(1);
18354        let b = _mm512_set1_epi8(2);
18355        let r = _mm512_mask_blend_epi8(
18356            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000,
18357            a,
18358            b,
18359        );
18360        #[rustfmt::skip]
18361        let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18362                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18363                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18364                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18365        assert_eq_m512i(r, e);
18366    }
18367
18368    #[simd_test(enable = "avx512bw,avx512vl")]
18369    unsafe fn test_mm256_mask_blend_epi8() {
18370        let a = _mm256_set1_epi8(1);
18371        let b = _mm256_set1_epi8(2);
18372        let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b);
18373        #[rustfmt::skip]
18374        let e = _mm256_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18375                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18376        assert_eq_m256i(r, e);
18377    }
18378
18379    #[simd_test(enable = "avx512bw,avx512vl")]
18380    unsafe fn test_mm_mask_blend_epi8() {
18381        let a = _mm_set1_epi8(1);
18382        let b = _mm_set1_epi8(2);
18383        let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b);
18384        let e = _mm_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18385        assert_eq_m128i(r, e);
18386    }
18387
18388    #[simd_test(enable = "avx512bw")]
18389    unsafe fn test_mm512_broadcastw_epi16() {
18390        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18391        let r = _mm512_broadcastw_epi16(a);
18392        let e = _mm512_set1_epi16(24);
18393        assert_eq_m512i(r, e);
18394    }
18395
18396    #[simd_test(enable = "avx512bw")]
18397    unsafe fn test_mm512_mask_broadcastw_epi16() {
18398        let src = _mm512_set1_epi16(1);
18399        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18400        let r = _mm512_mask_broadcastw_epi16(src, 0, a);
18401        assert_eq_m512i(r, src);
18402        let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a);
18403        let e = _mm512_set1_epi16(24);
18404        assert_eq_m512i(r, e);
18405    }
18406
18407    #[simd_test(enable = "avx512bw")]
18408    unsafe fn test_mm512_maskz_broadcastw_epi16() {
18409        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18410        let r = _mm512_maskz_broadcastw_epi16(0, a);
18411        assert_eq_m512i(r, _mm512_setzero_si512());
18412        let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a);
18413        let e = _mm512_set1_epi16(24);
18414        assert_eq_m512i(r, e);
18415    }
18416
18417    #[simd_test(enable = "avx512bw,avx512vl")]
18418    unsafe fn test_mm256_mask_broadcastw_epi16() {
18419        let src = _mm256_set1_epi16(1);
18420        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18421        let r = _mm256_mask_broadcastw_epi16(src, 0, a);
18422        assert_eq_m256i(r, src);
18423        let r = _mm256_mask_broadcastw_epi16(src, 0b11111111_11111111, a);
18424        let e = _mm256_set1_epi16(24);
18425        assert_eq_m256i(r, e);
18426    }
18427
18428    #[simd_test(enable = "avx512bw,avx512vl")]
18429    unsafe fn test_mm256_maskz_broadcastw_epi16() {
18430        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18431        let r = _mm256_maskz_broadcastw_epi16(0, a);
18432        assert_eq_m256i(r, _mm256_setzero_si256());
18433        let r = _mm256_maskz_broadcastw_epi16(0b11111111_11111111, a);
18434        let e = _mm256_set1_epi16(24);
18435        assert_eq_m256i(r, e);
18436    }
18437
18438    #[simd_test(enable = "avx512bw,avx512vl")]
18439    unsafe fn test_mm_mask_broadcastw_epi16() {
18440        let src = _mm_set1_epi16(1);
18441        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18442        let r = _mm_mask_broadcastw_epi16(src, 0, a);
18443        assert_eq_m128i(r, src);
18444        let r = _mm_mask_broadcastw_epi16(src, 0b11111111, a);
18445        let e = _mm_set1_epi16(24);
18446        assert_eq_m128i(r, e);
18447    }
18448
18449    #[simd_test(enable = "avx512bw,avx512vl")]
18450    unsafe fn test_mm_maskz_broadcastw_epi16() {
18451        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18452        let r = _mm_maskz_broadcastw_epi16(0, a);
18453        assert_eq_m128i(r, _mm_setzero_si128());
18454        let r = _mm_maskz_broadcastw_epi16(0b11111111, a);
18455        let e = _mm_set1_epi16(24);
18456        assert_eq_m128i(r, e);
18457    }
18458
18459    #[simd_test(enable = "avx512bw")]
18460    unsafe fn test_mm512_broadcastb_epi8() {
18461        let a = _mm_set_epi8(
18462            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18463        );
18464        let r = _mm512_broadcastb_epi8(a);
18465        let e = _mm512_set1_epi8(32);
18466        assert_eq_m512i(r, e);
18467    }
18468
18469    #[simd_test(enable = "avx512bw")]
18470    unsafe fn test_mm512_mask_broadcastb_epi8() {
18471        let src = _mm512_set1_epi8(1);
18472        let a = _mm_set_epi8(
18473            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18474        );
18475        let r = _mm512_mask_broadcastb_epi8(src, 0, a);
18476        assert_eq_m512i(r, src);
18477        let r = _mm512_mask_broadcastb_epi8(
18478            src,
18479            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18480            a,
18481        );
18482        let e = _mm512_set1_epi8(32);
18483        assert_eq_m512i(r, e);
18484    }
18485
18486    #[simd_test(enable = "avx512bw")]
18487    unsafe fn test_mm512_maskz_broadcastb_epi8() {
18488        let a = _mm_set_epi8(
18489            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18490        );
18491        let r = _mm512_maskz_broadcastb_epi8(0, a);
18492        assert_eq_m512i(r, _mm512_setzero_si512());
18493        let r = _mm512_maskz_broadcastb_epi8(
18494            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18495            a,
18496        );
18497        let e = _mm512_set1_epi8(32);
18498        assert_eq_m512i(r, e);
18499    }
18500
18501    #[simd_test(enable = "avx512bw,avx512vl")]
18502    unsafe fn test_mm256_mask_broadcastb_epi8() {
18503        let src = _mm256_set1_epi8(1);
18504        let a = _mm_set_epi8(
18505            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18506        );
18507        let r = _mm256_mask_broadcastb_epi8(src, 0, a);
18508        assert_eq_m256i(r, src);
18509        let r = _mm256_mask_broadcastb_epi8(src, 0b11111111_11111111_11111111_11111111, a);
18510        let e = _mm256_set1_epi8(32);
18511        assert_eq_m256i(r, e);
18512    }
18513
18514    #[simd_test(enable = "avx512bw,avx512vl")]
18515    unsafe fn test_mm256_maskz_broadcastb_epi8() {
18516        let a = _mm_set_epi8(
18517            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18518        );
18519        let r = _mm256_maskz_broadcastb_epi8(0, a);
18520        assert_eq_m256i(r, _mm256_setzero_si256());
18521        let r = _mm256_maskz_broadcastb_epi8(0b11111111_11111111_11111111_11111111, a);
18522        let e = _mm256_set1_epi8(32);
18523        assert_eq_m256i(r, e);
18524    }
18525
18526    #[simd_test(enable = "avx512bw,avx512vl")]
18527    unsafe fn test_mm_mask_broadcastb_epi8() {
18528        let src = _mm_set1_epi8(1);
18529        let a = _mm_set_epi8(
18530            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18531        );
18532        let r = _mm_mask_broadcastb_epi8(src, 0, a);
18533        assert_eq_m128i(r, src);
18534        let r = _mm_mask_broadcastb_epi8(src, 0b11111111_11111111, a);
18535        let e = _mm_set1_epi8(32);
18536        assert_eq_m128i(r, e);
18537    }
18538
18539    #[simd_test(enable = "avx512bw,avx512vl")]
18540    unsafe fn test_mm_maskz_broadcastb_epi8() {
18541        let a = _mm_set_epi8(
18542            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18543        );
18544        let r = _mm_maskz_broadcastb_epi8(0, a);
18545        assert_eq_m128i(r, _mm_setzero_si128());
18546        let r = _mm_maskz_broadcastb_epi8(0b11111111_11111111, a);
18547        let e = _mm_set1_epi8(32);
18548        assert_eq_m128i(r, e);
18549    }
18550
18551    #[simd_test(enable = "avx512bw")]
18552    unsafe fn test_mm512_unpackhi_epi16() {
18553        #[rustfmt::skip]
18554        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18555                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18556        #[rustfmt::skip]
18557        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18558                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18559        let r = _mm512_unpackhi_epi16(a, b);
18560        #[rustfmt::skip]
18561        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18562                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18563        assert_eq_m512i(r, e);
18564    }
18565
18566    #[simd_test(enable = "avx512bw")]
18567    unsafe fn test_mm512_mask_unpackhi_epi16() {
18568        #[rustfmt::skip]
18569        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18570                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18571        #[rustfmt::skip]
18572        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18573                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18574        let r = _mm512_mask_unpackhi_epi16(a, 0, a, b);
18575        assert_eq_m512i(r, a);
18576        let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
18577        #[rustfmt::skip]
18578        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18579                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18580        assert_eq_m512i(r, e);
18581    }
18582
18583    #[simd_test(enable = "avx512bw")]
18584    unsafe fn test_mm512_maskz_unpackhi_epi16() {
18585        #[rustfmt::skip]
18586        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18587                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18588        #[rustfmt::skip]
18589        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18590                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18591        let r = _mm512_maskz_unpackhi_epi16(0, a, b);
18592        assert_eq_m512i(r, _mm512_setzero_si512());
18593        let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b);
18594        #[rustfmt::skip]
18595        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18596                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18597        assert_eq_m512i(r, e);
18598    }
18599
18600    #[simd_test(enable = "avx512bw,avx512vl")]
18601    unsafe fn test_mm256_mask_unpackhi_epi16() {
18602        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18603        let b = _mm256_set_epi16(
18604            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18605        );
18606        let r = _mm256_mask_unpackhi_epi16(a, 0, a, b);
18607        assert_eq_m256i(r, a);
18608        let r = _mm256_mask_unpackhi_epi16(a, 0b11111111_11111111, a, b);
18609        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
18610        assert_eq_m256i(r, e);
18611    }
18612
18613    #[simd_test(enable = "avx512bw,avx512vl")]
18614    unsafe fn test_mm256_maskz_unpackhi_epi16() {
18615        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18616        let b = _mm256_set_epi16(
18617            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18618        );
18619        let r = _mm256_maskz_unpackhi_epi16(0, a, b);
18620        assert_eq_m256i(r, _mm256_setzero_si256());
18621        let r = _mm256_maskz_unpackhi_epi16(0b11111111_11111111, a, b);
18622        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
18623        assert_eq_m256i(r, e);
18624    }
18625
18626    #[simd_test(enable = "avx512bw,avx512vl")]
18627    unsafe fn test_mm_mask_unpackhi_epi16() {
18628        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18629        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18630        let r = _mm_mask_unpackhi_epi16(a, 0, a, b);
18631        assert_eq_m128i(r, a);
18632        let r = _mm_mask_unpackhi_epi16(a, 0b11111111, a, b);
18633        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
18634        assert_eq_m128i(r, e);
18635    }
18636
18637    #[simd_test(enable = "avx512bw,avx512vl")]
18638    unsafe fn test_mm_maskz_unpackhi_epi16() {
18639        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18640        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18641        let r = _mm_maskz_unpackhi_epi16(0, a, b);
18642        assert_eq_m128i(r, _mm_setzero_si128());
18643        let r = _mm_maskz_unpackhi_epi16(0b11111111, a, b);
18644        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
18645        assert_eq_m128i(r, e);
18646    }
18647
18648    #[simd_test(enable = "avx512bw")]
18649    unsafe fn test_mm512_unpackhi_epi8() {
18650        #[rustfmt::skip]
18651        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18652                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18653                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18654                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18655        #[rustfmt::skip]
18656        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18657                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18658                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18659                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18660        let r = _mm512_unpackhi_epi8(a, b);
18661        #[rustfmt::skip]
18662        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18663                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18664                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18665                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18666        assert_eq_m512i(r, e);
18667    }
18668
18669    #[simd_test(enable = "avx512bw")]
18670    unsafe fn test_mm512_mask_unpackhi_epi8() {
18671        #[rustfmt::skip]
18672        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18673                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18674                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18675                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18676        #[rustfmt::skip]
18677        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18678                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18679                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18680                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18681        let r = _mm512_mask_unpackhi_epi8(a, 0, a, b);
18682        assert_eq_m512i(r, a);
18683        let r = _mm512_mask_unpackhi_epi8(
18684            a,
18685            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18686            a,
18687            b,
18688        );
18689        #[rustfmt::skip]
18690        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18691                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18692                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18693                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18694        assert_eq_m512i(r, e);
18695    }
18696
18697    #[simd_test(enable = "avx512bw")]
18698    unsafe fn test_mm512_maskz_unpackhi_epi8() {
18699        #[rustfmt::skip]
18700        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18701                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18702                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18703                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18704        #[rustfmt::skip]
18705        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18706                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18707                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18708                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18709        let r = _mm512_maskz_unpackhi_epi8(0, a, b);
18710        assert_eq_m512i(r, _mm512_setzero_si512());
18711        let r = _mm512_maskz_unpackhi_epi8(
18712            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18713            a,
18714            b,
18715        );
18716        #[rustfmt::skip]
18717        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18718                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18719                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18720                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18721        assert_eq_m512i(r, e);
18722    }
18723
18724    #[simd_test(enable = "avx512bw,avx512vl")]
18725    unsafe fn test_mm256_mask_unpackhi_epi8() {
18726        #[rustfmt::skip]
18727        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18728                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18729        #[rustfmt::skip]
18730        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18731                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18732        let r = _mm256_mask_unpackhi_epi8(a, 0, a, b);
18733        assert_eq_m256i(r, a);
18734        let r = _mm256_mask_unpackhi_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
18735        #[rustfmt::skip]
18736        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18737                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
18738        assert_eq_m256i(r, e);
18739    }
18740
18741    #[simd_test(enable = "avx512bw,avx512vl")]
18742    unsafe fn test_mm256_maskz_unpackhi_epi8() {
18743        #[rustfmt::skip]
18744        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18745                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18746        #[rustfmt::skip]
18747        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18748                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18749        let r = _mm256_maskz_unpackhi_epi8(0, a, b);
18750        assert_eq_m256i(r, _mm256_setzero_si256());
18751        let r = _mm256_maskz_unpackhi_epi8(0b11111111_11111111_11111111_11111111, a, b);
18752        #[rustfmt::skip]
18753        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18754                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
18755        assert_eq_m256i(r, e);
18756    }
18757
18758    #[simd_test(enable = "avx512bw,avx512vl")]
18759    unsafe fn test_mm_mask_unpackhi_epi8() {
18760        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18761        let b = _mm_set_epi8(
18762            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18763        );
18764        let r = _mm_mask_unpackhi_epi8(a, 0, a, b);
18765        assert_eq_m128i(r, a);
18766        let r = _mm_mask_unpackhi_epi8(a, 0b11111111_11111111, a, b);
18767        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
18768        assert_eq_m128i(r, e);
18769    }
18770
18771    #[simd_test(enable = "avx512bw,avx512vl")]
18772    unsafe fn test_mm_maskz_unpackhi_epi8() {
18773        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18774        let b = _mm_set_epi8(
18775            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18776        );
18777        let r = _mm_maskz_unpackhi_epi8(0, a, b);
18778        assert_eq_m128i(r, _mm_setzero_si128());
18779        let r = _mm_maskz_unpackhi_epi8(0b11111111_11111111, a, b);
18780        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
18781        assert_eq_m128i(r, e);
18782    }
18783
18784    #[simd_test(enable = "avx512bw")]
18785    unsafe fn test_mm512_unpacklo_epi16() {
18786        #[rustfmt::skip]
18787        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18788                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18789        #[rustfmt::skip]
18790        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18791                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18792        let r = _mm512_unpacklo_epi16(a, b);
18793        #[rustfmt::skip]
18794        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
18795                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
18796        assert_eq_m512i(r, e);
18797    }
18798
18799    #[simd_test(enable = "avx512bw")]
18800    unsafe fn test_mm512_mask_unpacklo_epi16() {
18801        #[rustfmt::skip]
18802        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18803                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18804        #[rustfmt::skip]
18805        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18806                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18807        let r = _mm512_mask_unpacklo_epi16(a, 0, a, b);
18808        assert_eq_m512i(r, a);
18809        let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
18810        #[rustfmt::skip]
18811        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
18812                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
18813        assert_eq_m512i(r, e);
18814    }
18815
18816    #[simd_test(enable = "avx512bw")]
18817    unsafe fn test_mm512_maskz_unpacklo_epi16() {
18818        #[rustfmt::skip]
18819        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18820                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18821        #[rustfmt::skip]
18822        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18823                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18824        let r = _mm512_maskz_unpacklo_epi16(0, a, b);
18825        assert_eq_m512i(r, _mm512_setzero_si512());
18826        let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b);
18827        #[rustfmt::skip]
18828        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
18829                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
18830        assert_eq_m512i(r, e);
18831    }
18832
18833    #[simd_test(enable = "avx512bw,avx512vl")]
18834    unsafe fn test_mm256_mask_unpacklo_epi16() {
18835        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18836        let b = _mm256_set_epi16(
18837            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18838        );
18839        let r = _mm256_mask_unpacklo_epi16(a, 0, a, b);
18840        assert_eq_m256i(r, a);
18841        let r = _mm256_mask_unpacklo_epi16(a, 0b11111111_11111111, a, b);
18842        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
18843        assert_eq_m256i(r, e);
18844    }
18845
18846    #[simd_test(enable = "avx512bw,avx512vl")]
18847    unsafe fn test_mm256_maskz_unpacklo_epi16() {
18848        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18849        let b = _mm256_set_epi16(
18850            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18851        );
18852        let r = _mm256_maskz_unpacklo_epi16(0, a, b);
18853        assert_eq_m256i(r, _mm256_setzero_si256());
18854        let r = _mm256_maskz_unpacklo_epi16(0b11111111_11111111, a, b);
18855        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
18856        assert_eq_m256i(r, e);
18857    }
18858
18859    #[simd_test(enable = "avx512bw,avx512vl")]
18860    unsafe fn test_mm_mask_unpacklo_epi16() {
18861        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18862        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18863        let r = _mm_mask_unpacklo_epi16(a, 0, a, b);
18864        assert_eq_m128i(r, a);
18865        let r = _mm_mask_unpacklo_epi16(a, 0b11111111, a, b);
18866        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
18867        assert_eq_m128i(r, e);
18868    }
18869
18870    #[simd_test(enable = "avx512bw,avx512vl")]
18871    unsafe fn test_mm_maskz_unpacklo_epi16() {
18872        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18873        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18874        let r = _mm_maskz_unpacklo_epi16(0, a, b);
18875        assert_eq_m128i(r, _mm_setzero_si128());
18876        let r = _mm_maskz_unpacklo_epi16(0b11111111, a, b);
18877        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
18878        assert_eq_m128i(r, e);
18879    }
18880
18881    #[simd_test(enable = "avx512bw")]
18882    unsafe fn test_mm512_unpacklo_epi8() {
18883        #[rustfmt::skip]
18884        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18885                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18886                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18887                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18888        #[rustfmt::skip]
18889        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18890                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18891                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18892                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18893        let r = _mm512_unpacklo_epi8(a, b);
18894        #[rustfmt::skip]
18895        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18896                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
18897                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
18898                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
18899        assert_eq_m512i(r, e);
18900    }
18901
18902    #[simd_test(enable = "avx512bw")]
18903    unsafe fn test_mm512_mask_unpacklo_epi8() {
18904        #[rustfmt::skip]
18905        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18906                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18907                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18908                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18909        #[rustfmt::skip]
18910        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18911                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18912                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18913                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18914        let r = _mm512_mask_unpacklo_epi8(a, 0, a, b);
18915        assert_eq_m512i(r, a);
18916        let r = _mm512_mask_unpacklo_epi8(
18917            a,
18918            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18919            a,
18920            b,
18921        );
18922        #[rustfmt::skip]
18923        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18924                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
18925                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
18926                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
18927        assert_eq_m512i(r, e);
18928    }
18929
18930    #[simd_test(enable = "avx512bw")]
18931    unsafe fn test_mm512_maskz_unpacklo_epi8() {
18932        #[rustfmt::skip]
18933        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18934                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18935                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18936                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18937        #[rustfmt::skip]
18938        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18939                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18940                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18941                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18942        let r = _mm512_maskz_unpacklo_epi8(0, a, b);
18943        assert_eq_m512i(r, _mm512_setzero_si512());
18944        let r = _mm512_maskz_unpacklo_epi8(
18945            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18946            a,
18947            b,
18948        );
18949        #[rustfmt::skip]
18950        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18951                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
18952                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
18953                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
18954        assert_eq_m512i(r, e);
18955    }
18956
18957    #[simd_test(enable = "avx512bw,avx512vl")]
18958    unsafe fn test_mm256_mask_unpacklo_epi8() {
18959        #[rustfmt::skip]
18960        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18961                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18962        #[rustfmt::skip]
18963        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18964                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18965        let r = _mm256_mask_unpacklo_epi8(a, 0, a, b);
18966        assert_eq_m256i(r, a);
18967        let r = _mm256_mask_unpacklo_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
18968        #[rustfmt::skip]
18969        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18970                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
18971        assert_eq_m256i(r, e);
18972    }
18973
18974    #[simd_test(enable = "avx512bw,avx512vl")]
18975    unsafe fn test_mm256_maskz_unpacklo_epi8() {
18976        #[rustfmt::skip]
18977        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18978                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18979        #[rustfmt::skip]
18980        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18981                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18982        let r = _mm256_maskz_unpacklo_epi8(0, a, b);
18983        assert_eq_m256i(r, _mm256_setzero_si256());
18984        let r = _mm256_maskz_unpacklo_epi8(0b11111111_11111111_11111111_11111111, a, b);
18985        #[rustfmt::skip]
18986        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18987                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
18988        assert_eq_m256i(r, e);
18989    }
18990
18991    #[simd_test(enable = "avx512bw,avx512vl")]
18992    unsafe fn test_mm_mask_unpacklo_epi8() {
18993        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18994        let b = _mm_set_epi8(
18995            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18996        );
18997        let r = _mm_mask_unpacklo_epi8(a, 0, a, b);
18998        assert_eq_m128i(r, a);
18999        let r = _mm_mask_unpacklo_epi8(a, 0b11111111_11111111, a, b);
19000        let e = _mm_set_epi8(
19001            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
19002        );
19003        assert_eq_m128i(r, e);
19004    }
19005
19006    #[simd_test(enable = "avx512bw,avx512vl")]
19007    unsafe fn test_mm_maskz_unpacklo_epi8() {
19008        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19009        let b = _mm_set_epi8(
19010            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19011        );
19012        let r = _mm_maskz_unpacklo_epi8(0, a, b);
19013        assert_eq_m128i(r, _mm_setzero_si128());
19014        let r = _mm_maskz_unpacklo_epi8(0b11111111_11111111, a, b);
19015        let e = _mm_set_epi8(
19016            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
19017        );
19018        assert_eq_m128i(r, e);
19019    }
19020
19021    #[simd_test(enable = "avx512bw")]
19022    unsafe fn test_mm512_mask_mov_epi16() {
19023        let src = _mm512_set1_epi16(1);
19024        let a = _mm512_set1_epi16(2);
19025        let r = _mm512_mask_mov_epi16(src, 0, a);
19026        assert_eq_m512i(r, src);
19027        let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19028        assert_eq_m512i(r, a);
19029    }
19030
19031    #[simd_test(enable = "avx512bw")]
19032    unsafe fn test_mm512_maskz_mov_epi16() {
19033        let a = _mm512_set1_epi16(2);
19034        let r = _mm512_maskz_mov_epi16(0, a);
19035        assert_eq_m512i(r, _mm512_setzero_si512());
19036        let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a);
19037        assert_eq_m512i(r, a);
19038    }
19039
19040    #[simd_test(enable = "avx512bw,avx512vl")]
19041    unsafe fn test_mm256_mask_mov_epi16() {
19042        let src = _mm256_set1_epi16(1);
19043        let a = _mm256_set1_epi16(2);
19044        let r = _mm256_mask_mov_epi16(src, 0, a);
19045        assert_eq_m256i(r, src);
19046        let r = _mm256_mask_mov_epi16(src, 0b11111111_11111111, a);
19047        assert_eq_m256i(r, a);
19048    }
19049
19050    #[simd_test(enable = "avx512bw,avx512vl")]
19051    unsafe fn test_mm256_maskz_mov_epi16() {
19052        let a = _mm256_set1_epi16(2);
19053        let r = _mm256_maskz_mov_epi16(0, a);
19054        assert_eq_m256i(r, _mm256_setzero_si256());
19055        let r = _mm256_maskz_mov_epi16(0b11111111_11111111, a);
19056        assert_eq_m256i(r, a);
19057    }
19058
19059    #[simd_test(enable = "avx512bw,avx512vl")]
19060    unsafe fn test_mm_mask_mov_epi16() {
19061        let src = _mm_set1_epi16(1);
19062        let a = _mm_set1_epi16(2);
19063        let r = _mm_mask_mov_epi16(src, 0, a);
19064        assert_eq_m128i(r, src);
19065        let r = _mm_mask_mov_epi16(src, 0b11111111, a);
19066        assert_eq_m128i(r, a);
19067    }
19068
19069    #[simd_test(enable = "avx512bw,avx512vl")]
19070    unsafe fn test_mm_maskz_mov_epi16() {
19071        let a = _mm_set1_epi16(2);
19072        let r = _mm_maskz_mov_epi16(0, a);
19073        assert_eq_m128i(r, _mm_setzero_si128());
19074        let r = _mm_maskz_mov_epi16(0b11111111, a);
19075        assert_eq_m128i(r, a);
19076    }
19077
19078    #[simd_test(enable = "avx512bw")]
19079    unsafe fn test_mm512_mask_mov_epi8() {
19080        let src = _mm512_set1_epi8(1);
19081        let a = _mm512_set1_epi8(2);
19082        let r = _mm512_mask_mov_epi8(src, 0, a);
19083        assert_eq_m512i(r, src);
19084        let r = _mm512_mask_mov_epi8(
19085            src,
19086            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19087            a,
19088        );
19089        assert_eq_m512i(r, a);
19090    }
19091
19092    #[simd_test(enable = "avx512bw")]
19093    unsafe fn test_mm512_maskz_mov_epi8() {
19094        let a = _mm512_set1_epi8(2);
19095        let r = _mm512_maskz_mov_epi8(0, a);
19096        assert_eq_m512i(r, _mm512_setzero_si512());
19097        let r = _mm512_maskz_mov_epi8(
19098            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19099            a,
19100        );
19101        assert_eq_m512i(r, a);
19102    }
19103
19104    #[simd_test(enable = "avx512bw,avx512vl")]
19105    unsafe fn test_mm256_mask_mov_epi8() {
19106        let src = _mm256_set1_epi8(1);
19107        let a = _mm256_set1_epi8(2);
19108        let r = _mm256_mask_mov_epi8(src, 0, a);
19109        assert_eq_m256i(r, src);
19110        let r = _mm256_mask_mov_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19111        assert_eq_m256i(r, a);
19112    }
19113
19114    #[simd_test(enable = "avx512bw,avx512vl")]
19115    unsafe fn test_mm256_maskz_mov_epi8() {
19116        let a = _mm256_set1_epi8(2);
19117        let r = _mm256_maskz_mov_epi8(0, a);
19118        assert_eq_m256i(r, _mm256_setzero_si256());
19119        let r = _mm256_maskz_mov_epi8(0b11111111_11111111_11111111_11111111, a);
19120        assert_eq_m256i(r, a);
19121    }
19122
19123    #[simd_test(enable = "avx512bw,avx512vl")]
19124    unsafe fn test_mm_mask_mov_epi8() {
19125        let src = _mm_set1_epi8(1);
19126        let a = _mm_set1_epi8(2);
19127        let r = _mm_mask_mov_epi8(src, 0, a);
19128        assert_eq_m128i(r, src);
19129        let r = _mm_mask_mov_epi8(src, 0b11111111_11111111, a);
19130        assert_eq_m128i(r, a);
19131    }
19132
19133    #[simd_test(enable = "avx512bw,avx512vl")]
19134    unsafe fn test_mm_maskz_mov_epi8() {
19135        let a = _mm_set1_epi8(2);
19136        let r = _mm_maskz_mov_epi8(0, a);
19137        assert_eq_m128i(r, _mm_setzero_si128());
19138        let r = _mm_maskz_mov_epi8(0b11111111_11111111, a);
19139        assert_eq_m128i(r, a);
19140    }
19141
19142    #[simd_test(enable = "avx512bw")]
19143    unsafe fn test_mm512_mask_set1_epi16() {
19144        let src = _mm512_set1_epi16(2);
19145        let a: i16 = 11;
19146        let r = _mm512_mask_set1_epi16(src, 0, a);
19147        assert_eq_m512i(r, src);
19148        let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19149        let e = _mm512_set1_epi16(11);
19150        assert_eq_m512i(r, e);
19151    }
19152
19153    #[simd_test(enable = "avx512bw")]
19154    unsafe fn test_mm512_maskz_set1_epi16() {
19155        let a: i16 = 11;
19156        let r = _mm512_maskz_set1_epi16(0, a);
19157        assert_eq_m512i(r, _mm512_setzero_si512());
19158        let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a);
19159        let e = _mm512_set1_epi16(11);
19160        assert_eq_m512i(r, e);
19161    }
19162
19163    #[simd_test(enable = "avx512bw,avx512vl")]
19164    unsafe fn test_mm256_mask_set1_epi16() {
19165        let src = _mm256_set1_epi16(2);
19166        let a: i16 = 11;
19167        let r = _mm256_mask_set1_epi16(src, 0, a);
19168        assert_eq_m256i(r, src);
19169        let r = _mm256_mask_set1_epi16(src, 0b11111111_11111111, a);
19170        let e = _mm256_set1_epi16(11);
19171        assert_eq_m256i(r, e);
19172    }
19173
19174    #[simd_test(enable = "avx512bw,avx512vl")]
19175    unsafe fn test_mm256_maskz_set1_epi16() {
19176        let a: i16 = 11;
19177        let r = _mm256_maskz_set1_epi16(0, a);
19178        assert_eq_m256i(r, _mm256_setzero_si256());
19179        let r = _mm256_maskz_set1_epi16(0b11111111_11111111, a);
19180        let e = _mm256_set1_epi16(11);
19181        assert_eq_m256i(r, e);
19182    }
19183
19184    #[simd_test(enable = "avx512bw,avx512vl")]
19185    unsafe fn test_mm_mask_set1_epi16() {
19186        let src = _mm_set1_epi16(2);
19187        let a: i16 = 11;
19188        let r = _mm_mask_set1_epi16(src, 0, a);
19189        assert_eq_m128i(r, src);
19190        let r = _mm_mask_set1_epi16(src, 0b11111111, a);
19191        let e = _mm_set1_epi16(11);
19192        assert_eq_m128i(r, e);
19193    }
19194
19195    #[simd_test(enable = "avx512bw,avx512vl")]
19196    unsafe fn test_mm_maskz_set1_epi16() {
19197        let a: i16 = 11;
19198        let r = _mm_maskz_set1_epi16(0, a);
19199        assert_eq_m128i(r, _mm_setzero_si128());
19200        let r = _mm_maskz_set1_epi16(0b11111111, a);
19201        let e = _mm_set1_epi16(11);
19202        assert_eq_m128i(r, e);
19203    }
19204
19205    #[simd_test(enable = "avx512bw")]
19206    unsafe fn test_mm512_mask_set1_epi8() {
19207        let src = _mm512_set1_epi8(2);
19208        let a: i8 = 11;
19209        let r = _mm512_mask_set1_epi8(src, 0, a);
19210        assert_eq_m512i(r, src);
19211        let r = _mm512_mask_set1_epi8(
19212            src,
19213            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19214            a,
19215        );
19216        let e = _mm512_set1_epi8(11);
19217        assert_eq_m512i(r, e);
19218    }
19219
19220    #[simd_test(enable = "avx512bw")]
19221    unsafe fn test_mm512_maskz_set1_epi8() {
19222        let a: i8 = 11;
19223        let r = _mm512_maskz_set1_epi8(0, a);
19224        assert_eq_m512i(r, _mm512_setzero_si512());
19225        let r = _mm512_maskz_set1_epi8(
19226            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19227            a,
19228        );
19229        let e = _mm512_set1_epi8(11);
19230        assert_eq_m512i(r, e);
19231    }
19232
19233    #[simd_test(enable = "avx512bw,avx512vl")]
19234    unsafe fn test_mm256_mask_set1_epi8() {
19235        let src = _mm256_set1_epi8(2);
19236        let a: i8 = 11;
19237        let r = _mm256_mask_set1_epi8(src, 0, a);
19238        assert_eq_m256i(r, src);
19239        let r = _mm256_mask_set1_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19240        let e = _mm256_set1_epi8(11);
19241        assert_eq_m256i(r, e);
19242    }
19243
19244    #[simd_test(enable = "avx512bw,avx512vl")]
19245    unsafe fn test_mm256_maskz_set1_epi8() {
19246        let a: i8 = 11;
19247        let r = _mm256_maskz_set1_epi8(0, a);
19248        assert_eq_m256i(r, _mm256_setzero_si256());
19249        let r = _mm256_maskz_set1_epi8(0b11111111_11111111_11111111_11111111, a);
19250        let e = _mm256_set1_epi8(11);
19251        assert_eq_m256i(r, e);
19252    }
19253
19254    #[simd_test(enable = "avx512bw,avx512vl")]
19255    unsafe fn test_mm_mask_set1_epi8() {
19256        let src = _mm_set1_epi8(2);
19257        let a: i8 = 11;
19258        let r = _mm_mask_set1_epi8(src, 0, a);
19259        assert_eq_m128i(r, src);
19260        let r = _mm_mask_set1_epi8(src, 0b11111111_11111111, a);
19261        let e = _mm_set1_epi8(11);
19262        assert_eq_m128i(r, e);
19263    }
19264
19265    #[simd_test(enable = "avx512bw,avx512vl")]
19266    unsafe fn test_mm_maskz_set1_epi8() {
19267        let a: i8 = 11;
19268        let r = _mm_maskz_set1_epi8(0, a);
19269        assert_eq_m128i(r, _mm_setzero_si128());
19270        let r = _mm_maskz_set1_epi8(0b11111111_11111111, a);
19271        let e = _mm_set1_epi8(11);
19272        assert_eq_m128i(r, e);
19273    }
19274
19275    #[simd_test(enable = "avx512bw")]
19276    unsafe fn test_mm512_shufflelo_epi16() {
19277        #[rustfmt::skip]
19278        let a = _mm512_set_epi16(
19279            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19280            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19281        );
19282        #[rustfmt::skip]
19283        let e = _mm512_set_epi16(
19284            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
19285            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
19286        );
19287        let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a);
19288        assert_eq_m512i(r, e);
19289    }
19290
19291    #[simd_test(enable = "avx512bw")]
19292    unsafe fn test_mm512_mask_shufflelo_epi16() {
19293        #[rustfmt::skip]
19294        let a = _mm512_set_epi16(
19295            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19296            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19297        );
19298        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
19299        assert_eq_m512i(r, a);
19300        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(
19301            a,
19302            0b11111111_11111111_11111111_11111111,
19303            a,
19304        );
19305        #[rustfmt::skip]
19306        let e = _mm512_set_epi16(
19307            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
19308            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
19309        );
19310        assert_eq_m512i(r, e);
19311    }
19312
19313    #[simd_test(enable = "avx512bw")]
19314    unsafe fn test_mm512_maskz_shufflelo_epi16() {
19315        #[rustfmt::skip]
19316        let a = _mm512_set_epi16(
19317            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19318            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19319        );
19320        let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
19321        assert_eq_m512i(r, _mm512_setzero_si512());
19322        let r =
19323            _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
19324        #[rustfmt::skip]
19325        let e = _mm512_set_epi16(
19326            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
19327            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
19328        );
19329        assert_eq_m512i(r, e);
19330    }
19331
19332    #[simd_test(enable = "avx512bw,avx512vl")]
19333    unsafe fn test_mm256_mask_shufflelo_epi16() {
19334        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19335        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
19336        assert_eq_m256i(r, a);
19337        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
19338        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
19339        assert_eq_m256i(r, e);
19340    }
19341
19342    #[simd_test(enable = "avx512bw,avx512vl")]
19343    unsafe fn test_mm256_maskz_shufflelo_epi16() {
19344        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19345        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
19346        assert_eq_m256i(r, _mm256_setzero_si256());
19347        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
19348        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
19349        assert_eq_m256i(r, e);
19350    }
19351
19352    #[simd_test(enable = "avx512bw,avx512vl")]
19353    unsafe fn test_mm_mask_shufflelo_epi16() {
19354        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19355        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
19356        assert_eq_m128i(r, a);
19357        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a);
19358        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
19359        assert_eq_m128i(r, e);
19360    }
19361
19362    #[simd_test(enable = "avx512bw,avx512vl")]
19363    unsafe fn test_mm_maskz_shufflelo_epi16() {
19364        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19365        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
19366        assert_eq_m128i(r, _mm_setzero_si128());
19367        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a);
19368        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
19369        assert_eq_m128i(r, e);
19370    }
19371
19372    #[simd_test(enable = "avx512bw")]
19373    unsafe fn test_mm512_shufflehi_epi16() {
19374        #[rustfmt::skip]
19375        let a = _mm512_set_epi16(
19376            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19377            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19378        );
19379        #[rustfmt::skip]
19380        let e = _mm512_set_epi16(
19381            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19382            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19383        );
19384        let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a);
19385        assert_eq_m512i(r, e);
19386    }
19387
19388    #[simd_test(enable = "avx512bw")]
19389    unsafe fn test_mm512_mask_shufflehi_epi16() {
19390        #[rustfmt::skip]
19391        let a = _mm512_set_epi16(
19392            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19393            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19394        );
19395        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19396        assert_eq_m512i(r, a);
19397        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(
19398            a,
19399            0b11111111_11111111_11111111_11111111,
19400            a,
19401        );
19402        #[rustfmt::skip]
19403        let e = _mm512_set_epi16(
19404            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19405            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19406        );
19407        assert_eq_m512i(r, e);
19408    }
19409
19410    #[simd_test(enable = "avx512bw")]
19411    unsafe fn test_mm512_maskz_shufflehi_epi16() {
19412        #[rustfmt::skip]
19413        let a = _mm512_set_epi16(
19414            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19415            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19416        );
19417        let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19418        assert_eq_m512i(r, _mm512_setzero_si512());
19419        let r =
19420            _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
19421        #[rustfmt::skip]
19422        let e = _mm512_set_epi16(
19423            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19424            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19425        );
19426        assert_eq_m512i(r, e);
19427    }
19428
19429    #[simd_test(enable = "avx512bw,avx512vl")]
19430    unsafe fn test_mm256_mask_shufflehi_epi16() {
19431        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19432        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19433        assert_eq_m256i(r, a);
19434        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
19435        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
19436        assert_eq_m256i(r, e);
19437    }
19438
19439    #[simd_test(enable = "avx512bw,avx512vl")]
19440    unsafe fn test_mm256_maskz_shufflehi_epi16() {
19441        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19442        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19443        assert_eq_m256i(r, _mm256_setzero_si256());
19444        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
19445        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
19446        assert_eq_m256i(r, e);
19447    }
19448
19449    #[simd_test(enable = "avx512bw,avx512vl")]
19450    unsafe fn test_mm_mask_shufflehi_epi16() {
19451        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19452        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19453        assert_eq_m128i(r, a);
19454        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a);
19455        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
19456        assert_eq_m128i(r, e);
19457    }
19458
19459    #[simd_test(enable = "avx512bw,avx512vl")]
19460    unsafe fn test_mm_maskz_shufflehi_epi16() {
19461        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19462        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19463        assert_eq_m128i(r, _mm_setzero_si128());
19464        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a);
19465        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
19466        assert_eq_m128i(r, e);
19467    }
19468
19469    #[simd_test(enable = "avx512bw")]
19470    unsafe fn test_mm512_shuffle_epi8() {
19471        #[rustfmt::skip]
19472        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19473                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19474                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19475                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19476        let b = _mm512_set1_epi8(1);
19477        let r = _mm512_shuffle_epi8(a, b);
19478        #[rustfmt::skip]
19479        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19480                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19481                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19482                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19483        assert_eq_m512i(r, e);
19484    }
19485
19486    #[simd_test(enable = "avx512bw")]
19487    unsafe fn test_mm512_mask_shuffle_epi8() {
19488        #[rustfmt::skip]
19489        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19490                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19491                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19492                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19493        let b = _mm512_set1_epi8(1);
19494        let r = _mm512_mask_shuffle_epi8(a, 0, a, b);
19495        assert_eq_m512i(r, a);
19496        let r = _mm512_mask_shuffle_epi8(
19497            a,
19498            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19499            a,
19500            b,
19501        );
19502        #[rustfmt::skip]
19503        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19504                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19505                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19506                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19507        assert_eq_m512i(r, e);
19508    }
19509
19510    #[simd_test(enable = "avx512bw")]
19511    unsafe fn test_mm512_maskz_shuffle_epi8() {
19512        #[rustfmt::skip]
19513        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19514                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19515                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19516                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19517        let b = _mm512_set1_epi8(1);
19518        let r = _mm512_maskz_shuffle_epi8(0, a, b);
19519        assert_eq_m512i(r, _mm512_setzero_si512());
19520        let r = _mm512_maskz_shuffle_epi8(
19521            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19522            a,
19523            b,
19524        );
19525        #[rustfmt::skip]
19526        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19527                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19528                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19529                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19530        assert_eq_m512i(r, e);
19531    }
19532
19533    #[simd_test(enable = "avx512bw,avx512vl")]
19534    unsafe fn test_mm256_mask_shuffle_epi8() {
19535        #[rustfmt::skip]
19536        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19537                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19538        let b = _mm256_set1_epi8(1);
19539        let r = _mm256_mask_shuffle_epi8(a, 0, a, b);
19540        assert_eq_m256i(r, a);
19541        let r = _mm256_mask_shuffle_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
19542        #[rustfmt::skip]
19543        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19544                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
19545        assert_eq_m256i(r, e);
19546    }
19547
19548    #[simd_test(enable = "avx512bw,avx512vl")]
19549    unsafe fn test_mm256_maskz_shuffle_epi8() {
19550        #[rustfmt::skip]
19551        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19552                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19553        let b = _mm256_set1_epi8(1);
19554        let r = _mm256_maskz_shuffle_epi8(0, a, b);
19555        assert_eq_m256i(r, _mm256_setzero_si256());
19556        let r = _mm256_maskz_shuffle_epi8(0b11111111_11111111_11111111_11111111, a, b);
19557        #[rustfmt::skip]
19558        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19559                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
19560        assert_eq_m256i(r, e);
19561    }
19562
19563    #[simd_test(enable = "avx512bw,avx512vl")]
19564    unsafe fn test_mm_mask_shuffle_epi8() {
19565        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19566        let b = _mm_set1_epi8(1);
19567        let r = _mm_mask_shuffle_epi8(a, 0, a, b);
19568        assert_eq_m128i(r, a);
19569        let r = _mm_mask_shuffle_epi8(a, 0b11111111_11111111, a, b);
19570        let e = _mm_set_epi8(
19571            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19572        );
19573        assert_eq_m128i(r, e);
19574    }
19575
19576    #[simd_test(enable = "avx512bw,avx512vl")]
19577    unsafe fn test_mm_maskz_shuffle_epi8() {
19578        #[rustfmt::skip]
19579        let a = _mm_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15);
19580        let b = _mm_set1_epi8(1);
19581        let r = _mm_maskz_shuffle_epi8(0, a, b);
19582        assert_eq_m128i(r, _mm_setzero_si128());
19583        let r = _mm_maskz_shuffle_epi8(0b11111111_11111111, a, b);
19584        let e = _mm_set_epi8(
19585            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19586        );
19587        assert_eq_m128i(r, e);
19588    }
19589
19590    #[simd_test(enable = "avx512bw")]
19591    unsafe fn test_mm512_test_epi16_mask() {
19592        let a = _mm512_set1_epi16(1 << 0);
19593        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19594        let r = _mm512_test_epi16_mask(a, b);
19595        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19596        assert_eq!(r, e);
19597    }
19598
19599    #[simd_test(enable = "avx512bw")]
19600    unsafe fn test_mm512_mask_test_epi16_mask() {
19601        let a = _mm512_set1_epi16(1 << 0);
19602        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19603        let r = _mm512_mask_test_epi16_mask(0, a, b);
19604        assert_eq!(r, 0);
19605        let r = _mm512_mask_test_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
19606        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19607        assert_eq!(r, e);
19608    }
19609
19610    #[simd_test(enable = "avx512bw,avx512vl")]
19611    unsafe fn test_mm256_test_epi16_mask() {
19612        let a = _mm256_set1_epi16(1 << 0);
19613        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19614        let r = _mm256_test_epi16_mask(a, b);
19615        let e: __mmask16 = 0b11111111_11111111;
19616        assert_eq!(r, e);
19617    }
19618
19619    #[simd_test(enable = "avx512bw,avx512vl")]
19620    unsafe fn test_mm256_mask_test_epi16_mask() {
19621        let a = _mm256_set1_epi16(1 << 0);
19622        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19623        let r = _mm256_mask_test_epi16_mask(0, a, b);
19624        assert_eq!(r, 0);
19625        let r = _mm256_mask_test_epi16_mask(0b11111111_11111111, a, b);
19626        let e: __mmask16 = 0b11111111_11111111;
19627        assert_eq!(r, e);
19628    }
19629
19630    #[simd_test(enable = "avx512bw,avx512vl")]
19631    unsafe fn test_mm_test_epi16_mask() {
19632        let a = _mm_set1_epi16(1 << 0);
19633        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19634        let r = _mm_test_epi16_mask(a, b);
19635        let e: __mmask8 = 0b11111111;
19636        assert_eq!(r, e);
19637    }
19638
19639    #[simd_test(enable = "avx512bw,avx512vl")]
19640    unsafe fn test_mm_mask_test_epi16_mask() {
19641        let a = _mm_set1_epi16(1 << 0);
19642        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19643        let r = _mm_mask_test_epi16_mask(0, a, b);
19644        assert_eq!(r, 0);
19645        let r = _mm_mask_test_epi16_mask(0b11111111, a, b);
19646        let e: __mmask8 = 0b11111111;
19647        assert_eq!(r, e);
19648    }
19649
19650    #[simd_test(enable = "avx512bw")]
19651    unsafe fn test_mm512_test_epi8_mask() {
19652        let a = _mm512_set1_epi8(1 << 0);
19653        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19654        let r = _mm512_test_epi8_mask(a, b);
19655        let e: __mmask64 =
19656            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19657        assert_eq!(r, e);
19658    }
19659
19660    #[simd_test(enable = "avx512bw")]
19661    unsafe fn test_mm512_mask_test_epi8_mask() {
19662        let a = _mm512_set1_epi8(1 << 0);
19663        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19664        let r = _mm512_mask_test_epi8_mask(0, a, b);
19665        assert_eq!(r, 0);
19666        let r = _mm512_mask_test_epi8_mask(
19667            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19668            a,
19669            b,
19670        );
19671        let e: __mmask64 =
19672            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19673        assert_eq!(r, e);
19674    }
19675
19676    #[simd_test(enable = "avx512bw,avx512vl")]
19677    unsafe fn test_mm256_test_epi8_mask() {
19678        let a = _mm256_set1_epi8(1 << 0);
19679        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19680        let r = _mm256_test_epi8_mask(a, b);
19681        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19682        assert_eq!(r, e);
19683    }
19684
19685    #[simd_test(enable = "avx512bw,avx512vl")]
19686    unsafe fn test_mm256_mask_test_epi8_mask() {
19687        let a = _mm256_set1_epi8(1 << 0);
19688        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19689        let r = _mm256_mask_test_epi8_mask(0, a, b);
19690        assert_eq!(r, 0);
19691        let r = _mm256_mask_test_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
19692        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19693        assert_eq!(r, e);
19694    }
19695
19696    #[simd_test(enable = "avx512bw,avx512vl")]
19697    unsafe fn test_mm_test_epi8_mask() {
19698        let a = _mm_set1_epi8(1 << 0);
19699        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19700        let r = _mm_test_epi8_mask(a, b);
19701        let e: __mmask16 = 0b11111111_11111111;
19702        assert_eq!(r, e);
19703    }
19704
19705    #[simd_test(enable = "avx512bw,avx512vl")]
19706    unsafe fn test_mm_mask_test_epi8_mask() {
19707        let a = _mm_set1_epi8(1 << 0);
19708        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19709        let r = _mm_mask_test_epi8_mask(0, a, b);
19710        assert_eq!(r, 0);
19711        let r = _mm_mask_test_epi8_mask(0b11111111_11111111, a, b);
19712        let e: __mmask16 = 0b11111111_11111111;
19713        assert_eq!(r, e);
19714    }
19715
19716    #[simd_test(enable = "avx512bw")]
19717    unsafe fn test_mm512_testn_epi16_mask() {
19718        let a = _mm512_set1_epi16(1 << 0);
19719        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19720        let r = _mm512_testn_epi16_mask(a, b);
19721        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19722        assert_eq!(r, e);
19723    }
19724
19725    #[simd_test(enable = "avx512bw")]
19726    unsafe fn test_mm512_mask_testn_epi16_mask() {
19727        let a = _mm512_set1_epi16(1 << 0);
19728        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19729        let r = _mm512_mask_testn_epi16_mask(0, a, b);
19730        assert_eq!(r, 0);
19731        let r = _mm512_mask_testn_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
19732        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19733        assert_eq!(r, e);
19734    }
19735
19736    #[simd_test(enable = "avx512bw,avx512vl")]
19737    unsafe fn test_mm256_testn_epi16_mask() {
19738        let a = _mm256_set1_epi16(1 << 0);
19739        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19740        let r = _mm256_testn_epi16_mask(a, b);
19741        let e: __mmask16 = 0b00000000_00000000;
19742        assert_eq!(r, e);
19743    }
19744
19745    #[simd_test(enable = "avx512bw,avx512vl")]
19746    unsafe fn test_mm256_mask_testn_epi16_mask() {
19747        let a = _mm256_set1_epi16(1 << 0);
19748        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19749        let r = _mm256_mask_testn_epi16_mask(0, a, b);
19750        assert_eq!(r, 0);
19751        let r = _mm256_mask_testn_epi16_mask(0b11111111_11111111, a, b);
19752        let e: __mmask16 = 0b00000000_00000000;
19753        assert_eq!(r, e);
19754    }
19755
19756    #[simd_test(enable = "avx512bw,avx512vl")]
19757    unsafe fn test_mm_testn_epi16_mask() {
19758        let a = _mm_set1_epi16(1 << 0);
19759        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19760        let r = _mm_testn_epi16_mask(a, b);
19761        let e: __mmask8 = 0b00000000;
19762        assert_eq!(r, e);
19763    }
19764
19765    #[simd_test(enable = "avx512bw,avx512vl")]
19766    unsafe fn test_mm_mask_testn_epi16_mask() {
19767        let a = _mm_set1_epi16(1 << 0);
19768        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19769        let r = _mm_mask_testn_epi16_mask(0, a, b);
19770        assert_eq!(r, 0);
19771        let r = _mm_mask_testn_epi16_mask(0b11111111, a, b);
19772        let e: __mmask8 = 0b00000000;
19773        assert_eq!(r, e);
19774    }
19775
19776    #[simd_test(enable = "avx512bw")]
19777    unsafe fn test_mm512_testn_epi8_mask() {
19778        let a = _mm512_set1_epi8(1 << 0);
19779        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19780        let r = _mm512_testn_epi8_mask(a, b);
19781        let e: __mmask64 =
19782            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
19783        assert_eq!(r, e);
19784    }
19785
19786    #[simd_test(enable = "avx512bw")]
19787    unsafe fn test_mm512_mask_testn_epi8_mask() {
19788        let a = _mm512_set1_epi8(1 << 0);
19789        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19790        let r = _mm512_mask_testn_epi8_mask(0, a, b);
19791        assert_eq!(r, 0);
19792        let r = _mm512_mask_testn_epi8_mask(
19793            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19794            a,
19795            b,
19796        );
19797        let e: __mmask64 =
19798            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
19799        assert_eq!(r, e);
19800    }
19801
19802    #[simd_test(enable = "avx512bw,avx512vl")]
19803    unsafe fn test_mm256_testn_epi8_mask() {
19804        let a = _mm256_set1_epi8(1 << 0);
19805        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19806        let r = _mm256_testn_epi8_mask(a, b);
19807        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19808        assert_eq!(r, e);
19809    }
19810
19811    #[simd_test(enable = "avx512bw,avx512vl")]
19812    unsafe fn test_mm256_mask_testn_epi8_mask() {
19813        let a = _mm256_set1_epi8(1 << 0);
19814        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19815        let r = _mm256_mask_testn_epi8_mask(0, a, b);
19816        assert_eq!(r, 0);
19817        let r = _mm256_mask_testn_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
19818        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19819        assert_eq!(r, e);
19820    }
19821
19822    #[simd_test(enable = "avx512bw,avx512vl")]
19823    unsafe fn test_mm_testn_epi8_mask() {
19824        let a = _mm_set1_epi8(1 << 0);
19825        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19826        let r = _mm_testn_epi8_mask(a, b);
19827        let e: __mmask16 = 0b00000000_00000000;
19828        assert_eq!(r, e);
19829    }
19830
19831    #[simd_test(enable = "avx512bw,avx512vl")]
19832    unsafe fn test_mm_mask_testn_epi8_mask() {
19833        let a = _mm_set1_epi8(1 << 0);
19834        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19835        let r = _mm_mask_testn_epi8_mask(0, a, b);
19836        assert_eq!(r, 0);
19837        let r = _mm_mask_testn_epi8_mask(0b11111111_11111111, a, b);
19838        let e: __mmask16 = 0b00000000_00000000;
19839        assert_eq!(r, e);
19840    }
19841
19842    #[simd_test(enable = "avx512bw")]
19843    unsafe fn test_store_mask64() {
19844        let a: __mmask64 =
19845            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
19846        let mut r = 0;
19847        _store_mask64(&mut r, a);
19848        assert_eq!(r, a);
19849    }
19850
19851    #[simd_test(enable = "avx512bw")]
19852    unsafe fn test_store_mask32() {
19853        let a: __mmask32 = 0b11111111_00000000_11111111_00000000;
19854        let mut r = 0;
19855        _store_mask32(&mut r, a);
19856        assert_eq!(r, a);
19857    }
19858
19859    #[simd_test(enable = "avx512bw")]
19860    unsafe fn test_load_mask64() {
19861        let p: __mmask64 =
19862            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
19863        let r = _load_mask64(&p);
19864        let e: __mmask64 =
19865            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
19866        assert_eq!(r, e);
19867    }
19868
19869    #[simd_test(enable = "avx512bw")]
19870    unsafe fn test_load_mask32() {
19871        let p: __mmask32 = 0b11111111_00000000_11111111_00000000;
19872        let r = _load_mask32(&p);
19873        let e: __mmask32 = 0b11111111_00000000_11111111_00000000;
19874        assert_eq!(r, e);
19875    }
19876
19877    #[simd_test(enable = "avx512bw")]
19878    unsafe fn test_mm512_sad_epu8() {
19879        let a = _mm512_set1_epi8(2);
19880        let b = _mm512_set1_epi8(4);
19881        let r = _mm512_sad_epu8(a, b);
19882        let e = _mm512_set1_epi64(16);
19883        assert_eq_m512i(r, e);
19884    }
19885
19886    #[simd_test(enable = "avx512bw")]
19887    unsafe fn test_mm512_dbsad_epu8() {
19888        let a = _mm512_set1_epi8(2);
19889        let b = _mm512_set1_epi8(4);
19890        let r = _mm512_dbsad_epu8::<0>(a, b);
19891        let e = _mm512_set1_epi16(8);
19892        assert_eq_m512i(r, e);
19893    }
19894
19895    #[simd_test(enable = "avx512bw")]
19896    unsafe fn test_mm512_mask_dbsad_epu8() {
19897        let src = _mm512_set1_epi16(1);
19898        let a = _mm512_set1_epi8(2);
19899        let b = _mm512_set1_epi8(4);
19900        let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b);
19901        assert_eq_m512i(r, src);
19902        let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b);
19903        let e = _mm512_set1_epi16(8);
19904        assert_eq_m512i(r, e);
19905    }
19906
19907    #[simd_test(enable = "avx512bw")]
19908    unsafe fn test_mm512_maskz_dbsad_epu8() {
19909        let a = _mm512_set1_epi8(2);
19910        let b = _mm512_set1_epi8(4);
19911        let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b);
19912        assert_eq_m512i(r, _mm512_setzero_si512());
19913        let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b);
19914        let e = _mm512_set1_epi16(8);
19915        assert_eq_m512i(r, e);
19916    }
19917
19918    #[simd_test(enable = "avx512bw,avx512vl")]
19919    unsafe fn test_mm256_dbsad_epu8() {
19920        let a = _mm256_set1_epi8(2);
19921        let b = _mm256_set1_epi8(4);
19922        let r = _mm256_dbsad_epu8::<0>(a, b);
19923        let e = _mm256_set1_epi16(8);
19924        assert_eq_m256i(r, e);
19925    }
19926
19927    #[simd_test(enable = "avx512bw,avx512vl")]
19928    unsafe fn test_mm256_mask_dbsad_epu8() {
19929        let src = _mm256_set1_epi16(1);
19930        let a = _mm256_set1_epi8(2);
19931        let b = _mm256_set1_epi8(4);
19932        let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b);
19933        assert_eq_m256i(r, src);
19934        let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b);
19935        let e = _mm256_set1_epi16(8);
19936        assert_eq_m256i(r, e);
19937    }
19938
19939    #[simd_test(enable = "avx512bw,avx512vl")]
19940    unsafe fn test_mm256_maskz_dbsad_epu8() {
19941        let a = _mm256_set1_epi8(2);
19942        let b = _mm256_set1_epi8(4);
19943        let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b);
19944        assert_eq_m256i(r, _mm256_setzero_si256());
19945        let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b);
19946        let e = _mm256_set1_epi16(8);
19947        assert_eq_m256i(r, e);
19948    }
19949
19950    #[simd_test(enable = "avx512bw,avx512vl")]
19951    unsafe fn test_mm_dbsad_epu8() {
19952        let a = _mm_set1_epi8(2);
19953        let b = _mm_set1_epi8(4);
19954        let r = _mm_dbsad_epu8::<0>(a, b);
19955        let e = _mm_set1_epi16(8);
19956        assert_eq_m128i(r, e);
19957    }
19958
19959    #[simd_test(enable = "avx512bw,avx512vl")]
19960    unsafe fn test_mm_mask_dbsad_epu8() {
19961        let src = _mm_set1_epi16(1);
19962        let a = _mm_set1_epi8(2);
19963        let b = _mm_set1_epi8(4);
19964        let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b);
19965        assert_eq_m128i(r, src);
19966        let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b);
19967        let e = _mm_set1_epi16(8);
19968        assert_eq_m128i(r, e);
19969    }
19970
19971    #[simd_test(enable = "avx512bw,avx512vl")]
19972    unsafe fn test_mm_maskz_dbsad_epu8() {
19973        let a = _mm_set1_epi8(2);
19974        let b = _mm_set1_epi8(4);
19975        let r = _mm_maskz_dbsad_epu8::<0>(0, a, b);
19976        assert_eq_m128i(r, _mm_setzero_si128());
19977        let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b);
19978        let e = _mm_set1_epi16(8);
19979        assert_eq_m128i(r, e);
19980    }
19981
19982    #[simd_test(enable = "avx512bw")]
19983    unsafe fn test_mm512_movepi16_mask() {
19984        let a = _mm512_set1_epi16(1 << 15);
19985        let r = _mm512_movepi16_mask(a);
19986        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19987        assert_eq!(r, e);
19988    }
19989
19990    #[simd_test(enable = "avx512bw,avx512vl")]
19991    unsafe fn test_mm256_movepi16_mask() {
19992        let a = _mm256_set1_epi16(1 << 15);
19993        let r = _mm256_movepi16_mask(a);
19994        let e: __mmask16 = 0b11111111_11111111;
19995        assert_eq!(r, e);
19996    }
19997
19998    #[simd_test(enable = "avx512bw,avx512vl")]
19999    unsafe fn test_mm_movepi16_mask() {
20000        let a = _mm_set1_epi16(1 << 15);
20001        let r = _mm_movepi16_mask(a);
20002        let e: __mmask8 = 0b11111111;
20003        assert_eq!(r, e);
20004    }
20005
20006    #[simd_test(enable = "avx512bw")]
20007    unsafe fn test_mm512_movepi8_mask() {
20008        let a = _mm512_set1_epi8(1 << 7);
20009        let r = _mm512_movepi8_mask(a);
20010        let e: __mmask64 =
20011            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20012        assert_eq!(r, e);
20013    }
20014
20015    #[simd_test(enable = "avx512bw,avx512vl")]
20016    unsafe fn test_mm256_movepi8_mask() {
20017        let a = _mm256_set1_epi8(1 << 7);
20018        let r = _mm256_movepi8_mask(a);
20019        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20020        assert_eq!(r, e);
20021    }
20022
20023    #[simd_test(enable = "avx512bw,avx512vl")]
20024    unsafe fn test_mm_movepi8_mask() {
20025        let a = _mm_set1_epi8(1 << 7);
20026        let r = _mm_movepi8_mask(a);
20027        let e: __mmask16 = 0b11111111_11111111;
20028        assert_eq!(r, e);
20029    }
20030
20031    #[simd_test(enable = "avx512bw")]
20032    unsafe fn test_mm512_movm_epi16() {
20033        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
20034        let r = _mm512_movm_epi16(a);
20035        let e = _mm512_set1_epi16(
20036            1 << 15
20037                | 1 << 14
20038                | 1 << 13
20039                | 1 << 12
20040                | 1 << 11
20041                | 1 << 10
20042                | 1 << 9
20043                | 1 << 8
20044                | 1 << 7
20045                | 1 << 6
20046                | 1 << 5
20047                | 1 << 4
20048                | 1 << 3
20049                | 1 << 2
20050                | 1 << 1
20051                | 1 << 0,
20052        );
20053        assert_eq_m512i(r, e);
20054    }
20055
20056    #[simd_test(enable = "avx512bw,avx512vl")]
20057    unsafe fn test_mm256_movm_epi16() {
20058        let a: __mmask16 = 0b11111111_11111111;
20059        let r = _mm256_movm_epi16(a);
20060        let e = _mm256_set1_epi16(
20061            1 << 15
20062                | 1 << 14
20063                | 1 << 13
20064                | 1 << 12
20065                | 1 << 11
20066                | 1 << 10
20067                | 1 << 9
20068                | 1 << 8
20069                | 1 << 7
20070                | 1 << 6
20071                | 1 << 5
20072                | 1 << 4
20073                | 1 << 3
20074                | 1 << 2
20075                | 1 << 1
20076                | 1 << 0,
20077        );
20078        assert_eq_m256i(r, e);
20079    }
20080
20081    #[simd_test(enable = "avx512bw,avx512vl")]
20082    unsafe fn test_mm_movm_epi16() {
20083        let a: __mmask8 = 0b11111111;
20084        let r = _mm_movm_epi16(a);
20085        let e = _mm_set1_epi16(
20086            1 << 15
20087                | 1 << 14
20088                | 1 << 13
20089                | 1 << 12
20090                | 1 << 11
20091                | 1 << 10
20092                | 1 << 9
20093                | 1 << 8
20094                | 1 << 7
20095                | 1 << 6
20096                | 1 << 5
20097                | 1 << 4
20098                | 1 << 3
20099                | 1 << 2
20100                | 1 << 1
20101                | 1 << 0,
20102        );
20103        assert_eq_m128i(r, e);
20104    }
20105
20106    #[simd_test(enable = "avx512bw")]
20107    unsafe fn test_mm512_movm_epi8() {
20108        let a: __mmask64 =
20109            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20110        let r = _mm512_movm_epi8(a);
20111        let e =
20112            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20113        assert_eq_m512i(r, e);
20114    }
20115
20116    #[simd_test(enable = "avx512bw,avx512vl")]
20117    unsafe fn test_mm256_movm_epi8() {
20118        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
20119        let r = _mm256_movm_epi8(a);
20120        let e =
20121            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20122        assert_eq_m256i(r, e);
20123    }
20124
20125    #[simd_test(enable = "avx512bw,avx512vl")]
20126    unsafe fn test_mm_movm_epi8() {
20127        let a: __mmask16 = 0b11111111_11111111;
20128        let r = _mm_movm_epi8(a);
20129        let e =
20130            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20131        assert_eq_m128i(r, e);
20132    }
20133
20134    #[simd_test(enable = "avx512bw")]
20135    unsafe fn test_cvtmask32_u32() {
20136        let a: __mmask32 = 0b11001100_00110011_01100110_10011001;
20137        let r = _cvtmask32_u32(a);
20138        let e: u32 = 0b11001100_00110011_01100110_10011001;
20139        assert_eq!(r, e);
20140    }
20141
20142    #[simd_test(enable = "avx512bw")]
20143    unsafe fn test_cvtu32_mask32() {
20144        let a: u32 = 0b11001100_00110011_01100110_10011001;
20145        let r = _cvtu32_mask32(a);
20146        let e: __mmask32 = 0b11001100_00110011_01100110_10011001;
20147        assert_eq!(r, e);
20148    }
20149
20150    #[simd_test(enable = "avx512bw")]
20151    unsafe fn test_kadd_mask32() {
20152        let a: __mmask32 = 11;
20153        let b: __mmask32 = 22;
20154        let r = _kadd_mask32(a, b);
20155        let e: __mmask32 = 33;
20156        assert_eq!(r, e);
20157    }
20158
20159    #[simd_test(enable = "avx512bw")]
20160    unsafe fn test_kadd_mask64() {
20161        let a: __mmask64 = 11;
20162        let b: __mmask64 = 22;
20163        let r = _kadd_mask64(a, b);
20164        let e: __mmask64 = 33;
20165        assert_eq!(r, e);
20166    }
20167
20168    #[simd_test(enable = "avx512bw")]
20169    unsafe fn test_kand_mask32() {
20170        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
20171        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20172        let r = _kand_mask32(a, b);
20173        let e: __mmask32 = 0b11001100_00110011_11001100_00110011;
20174        assert_eq!(r, e);
20175    }
20176
20177    #[simd_test(enable = "avx512bw")]
20178    unsafe fn test_kand_mask64() {
20179        let a: __mmask64 =
20180            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20181        let b: __mmask64 =
20182            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20183        let r = _kand_mask64(a, b);
20184        let e: __mmask64 =
20185            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20186        assert_eq!(r, e);
20187    }
20188
20189    #[simd_test(enable = "avx512bw")]
20190    unsafe fn test_knot_mask32() {
20191        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
20192        let r = _knot_mask32(a);
20193        let e: __mmask32 = 0b00110011_11001100_00110011_11001100;
20194        assert_eq!(r, e);
20195    }
20196
20197    #[simd_test(enable = "avx512bw")]
20198    unsafe fn test_knot_mask64() {
20199        let a: __mmask64 =
20200            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20201        let r = _knot_mask64(a);
20202        let e: __mmask64 =
20203            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20204        assert_eq!(r, e);
20205    }
20206
20207    #[simd_test(enable = "avx512bw")]
20208    unsafe fn test_kandn_mask32() {
20209        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
20210        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20211        let r = _kandn_mask32(a, b);
20212        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20213        assert_eq!(r, e);
20214    }
20215
20216    #[simd_test(enable = "avx512bw")]
20217    unsafe fn test_kandn_mask64() {
20218        let a: __mmask64 =
20219            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20220        let b: __mmask64 =
20221            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20222        let r = _kandn_mask64(a, b);
20223        let e: __mmask64 =
20224            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20225        assert_eq!(r, e);
20226    }
20227
20228    #[simd_test(enable = "avx512bw")]
20229    unsafe fn test_kor_mask32() {
20230        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
20231        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20232        let r = _kor_mask32(a, b);
20233        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20234        assert_eq!(r, e);
20235    }
20236
20237    #[simd_test(enable = "avx512bw")]
20238    unsafe fn test_kor_mask64() {
20239        let a: __mmask64 =
20240            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20241        let b: __mmask64 =
20242            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20243        let r = _kor_mask64(a, b);
20244        let e: __mmask64 =
20245            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20246        assert_eq!(r, e);
20247    }
20248
20249    #[simd_test(enable = "avx512bw")]
20250    unsafe fn test_kxor_mask32() {
20251        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
20252        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20253        let r = _kxor_mask32(a, b);
20254        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20255        assert_eq!(r, e);
20256    }
20257
20258    #[simd_test(enable = "avx512bw")]
20259    unsafe fn test_kxor_mask64() {
20260        let a: __mmask64 =
20261            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20262        let b: __mmask64 =
20263            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20264        let r = _kxor_mask64(a, b);
20265        let e: __mmask64 =
20266            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20267        assert_eq!(r, e);
20268    }
20269
20270    #[simd_test(enable = "avx512bw")]
20271    unsafe fn test_kxnor_mask32() {
20272        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
20273        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20274        let r = _kxnor_mask32(a, b);
20275        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20276        assert_eq!(r, e);
20277    }
20278
20279    #[simd_test(enable = "avx512bw")]
20280    unsafe fn test_kxnor_mask64() {
20281        let a: __mmask64 =
20282            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20283        let b: __mmask64 =
20284            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20285        let r = _kxnor_mask64(a, b);
20286        let e: __mmask64 =
20287            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20288        assert_eq!(r, e);
20289    }
20290
20291    #[simd_test(enable = "avx512bw")]
20292    unsafe fn test_kortest_mask32_u8() {
20293        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20294        let b: __mmask32 = 0b1011011010110110_1011011010110110;
20295        let mut all_ones: u8 = 0;
20296        let r = _kortest_mask32_u8(a, b, &mut all_ones);
20297        assert_eq!(r, 0);
20298        assert_eq!(all_ones, 1);
20299    }
20300
20301    #[simd_test(enable = "avx512bw")]
20302    unsafe fn test_kortest_mask64_u8() {
20303        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20304        let b: __mmask64 = 0b1011011010110110_1011011010110110;
20305        let mut all_ones: u8 = 0;
20306        let r = _kortest_mask64_u8(a, b, &mut all_ones);
20307        assert_eq!(r, 0);
20308        assert_eq!(all_ones, 0);
20309    }
20310
20311    #[simd_test(enable = "avx512bw")]
20312    unsafe fn test_kortestc_mask32_u8() {
20313        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20314        let b: __mmask32 = 0b1011011010110110_1011011010110110;
20315        let r = _kortestc_mask32_u8(a, b);
20316        assert_eq!(r, 1);
20317    }
20318
20319    #[simd_test(enable = "avx512bw")]
20320    unsafe fn test_kortestc_mask64_u8() {
20321        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20322        let b: __mmask64 = 0b1011011010110110_1011011010110110;
20323        let r = _kortestc_mask64_u8(a, b);
20324        assert_eq!(r, 0);
20325    }
20326
20327    #[simd_test(enable = "avx512bw")]
20328    unsafe fn test_kortestz_mask32_u8() {
20329        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20330        let b: __mmask32 = 0b1011011010110110_1011011010110110;
20331        let r = _kortestz_mask32_u8(a, b);
20332        assert_eq!(r, 0);
20333    }
20334
20335    #[simd_test(enable = "avx512bw")]
20336    unsafe fn test_kortestz_mask64_u8() {
20337        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20338        let b: __mmask64 = 0b1011011010110110_1011011010110110;
20339        let r = _kortestz_mask64_u8(a, b);
20340        assert_eq!(r, 0);
20341    }
20342
20343    #[simd_test(enable = "avx512bw")]
20344    unsafe fn test_kshiftli_mask32() {
20345        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20346        let r = _kshiftli_mask32::<3>(a);
20347        let e: __mmask32 = 0b0100101101001011_0100101101001000;
20348        assert_eq!(r, e);
20349
20350        let r = _kshiftli_mask32::<31>(a);
20351        let e: __mmask32 = 0b1000000000000000_0000000000000000;
20352        assert_eq!(r, e);
20353
20354        let r = _kshiftli_mask32::<32>(a);
20355        let e: __mmask32 = 0b0000000000000000_0000000000000000;
20356        assert_eq!(r, e);
20357
20358        let r = _kshiftli_mask32::<33>(a);
20359        let e: __mmask32 = 0b0000000000000000_0000000000000000;
20360        assert_eq!(r, e);
20361    }
20362
20363    #[simd_test(enable = "avx512bw")]
20364    unsafe fn test_kshiftli_mask64() {
20365        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20366        let r = _kshiftli_mask64::<3>(a);
20367        let e: __mmask64 = 0b0110100101101001011_0100101101001000;
20368        assert_eq!(r, e);
20369
20370        let r = _kshiftli_mask64::<63>(a);
20371        let e: __mmask64 = 0b1000000000000000_0000000000000000_0000000000000000_0000000000000000;
20372        assert_eq!(r, e);
20373
20374        let r = _kshiftli_mask64::<64>(a);
20375        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
20376        assert_eq!(r, e);
20377
20378        let r = _kshiftli_mask64::<65>(a);
20379        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
20380        assert_eq!(r, e);
20381    }
20382
20383    #[simd_test(enable = "avx512bw")]
20384    unsafe fn test_kshiftri_mask32() {
20385        let a: __mmask32 = 0b1010100101101001_0110100101101001;
20386        let r = _kshiftri_mask32::<3>(a);
20387        let e: __mmask32 = 0b0001010100101101_0010110100101101;
20388        assert_eq!(r, e);
20389
20390        let r = _kshiftri_mask32::<31>(a);
20391        let e: __mmask32 = 0b0000000000000000_0000000000000001;
20392        assert_eq!(r, e);
20393
20394        let r = _kshiftri_mask32::<32>(a);
20395        let e: __mmask32 = 0b0000000000000000_0000000000000000;
20396        assert_eq!(r, e);
20397
20398        let r = _kshiftri_mask32::<33>(a);
20399        let e: __mmask32 = 0b0000000000000000_0000000000000000;
20400        assert_eq!(r, e);
20401    }
20402
20403    #[simd_test(enable = "avx512bw")]
20404    unsafe fn test_kshiftri_mask64() {
20405        let a: __mmask64 = 0b1010100101101001011_0100101101001000;
20406        let r = _kshiftri_mask64::<3>(a);
20407        let e: __mmask64 = 0b1010100101101001_0110100101101001;
20408        assert_eq!(r, e);
20409
20410        let r = _kshiftri_mask64::<34>(a);
20411        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000001;
20412        assert_eq!(r, e);
20413
20414        let r = _kshiftri_mask64::<35>(a);
20415        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
20416        assert_eq!(r, e);
20417
20418        let r = _kshiftri_mask64::<64>(a);
20419        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
20420        assert_eq!(r, e);
20421
20422        let r = _kshiftri_mask64::<65>(a);
20423        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
20424        assert_eq!(r, e);
20425    }
20426
20427    #[simd_test(enable = "avx512bw")]
20428    unsafe fn test_ktest_mask32_u8() {
20429        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20430        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20431        let mut and_not: u8 = 0;
20432        let r = _ktest_mask32_u8(a, b, &mut and_not);
20433        assert_eq!(r, 1);
20434        assert_eq!(and_not, 0);
20435    }
20436
20437    #[simd_test(enable = "avx512bw")]
20438    unsafe fn test_ktestc_mask32_u8() {
20439        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20440        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20441        let r = _ktestc_mask32_u8(a, b);
20442        assert_eq!(r, 0);
20443    }
20444
20445    #[simd_test(enable = "avx512bw")]
20446    unsafe fn test_ktestz_mask32_u8() {
20447        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20448        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20449        let r = _ktestz_mask32_u8(a, b);
20450        assert_eq!(r, 1);
20451    }
20452
20453    #[simd_test(enable = "avx512bw")]
20454    unsafe fn test_ktest_mask64_u8() {
20455        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20456        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20457        let mut and_not: u8 = 0;
20458        let r = _ktest_mask64_u8(a, b, &mut and_not);
20459        assert_eq!(r, 1);
20460        assert_eq!(and_not, 0);
20461    }
20462
20463    #[simd_test(enable = "avx512bw")]
20464    unsafe fn test_ktestc_mask64_u8() {
20465        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20466        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20467        let r = _ktestc_mask64_u8(a, b);
20468        assert_eq!(r, 0);
20469    }
20470
20471    #[simd_test(enable = "avx512bw")]
20472    unsafe fn test_ktestz_mask64_u8() {
20473        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20474        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20475        let r = _ktestz_mask64_u8(a, b);
20476        assert_eq!(r, 1);
20477    }
20478
20479    #[simd_test(enable = "avx512bw")]
20480    unsafe fn test_mm512_kunpackw() {
20481        let a: u32 = 0x00110011;
20482        let b: u32 = 0x00001011;
20483        let r = _mm512_kunpackw(a, b);
20484        let e: u32 = 0x00111011;
20485        assert_eq!(r, e);
20486    }
20487
20488    #[simd_test(enable = "avx512bw")]
20489    unsafe fn test_mm512_kunpackd() {
20490        let a: u64 = 0x11001100_00110011;
20491        let b: u64 = 0x00101110_00001011;
20492        let r = _mm512_kunpackd(a, b);
20493        let e: u64 = 0x00110011_00001011;
20494        assert_eq!(r, e);
20495    }
20496
20497    #[simd_test(enable = "avx512bw")]
20498    unsafe fn test_mm512_cvtepi16_epi8() {
20499        let a = _mm512_set1_epi16(2);
20500        let r = _mm512_cvtepi16_epi8(a);
20501        let e = _mm256_set1_epi8(2);
20502        assert_eq_m256i(r, e);
20503    }
20504
20505    #[simd_test(enable = "avx512bw")]
20506    unsafe fn test_mm512_mask_cvtepi16_epi8() {
20507        let src = _mm256_set1_epi8(1);
20508        let a = _mm512_set1_epi16(2);
20509        let r = _mm512_mask_cvtepi16_epi8(src, 0, a);
20510        assert_eq_m256i(r, src);
20511        let r = _mm512_mask_cvtepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20512        let e = _mm256_set1_epi8(2);
20513        assert_eq_m256i(r, e);
20514    }
20515
20516    #[simd_test(enable = "avx512bw")]
20517    unsafe fn test_mm512_maskz_cvtepi16_epi8() {
20518        let a = _mm512_set1_epi16(2);
20519        let r = _mm512_maskz_cvtepi16_epi8(0, a);
20520        assert_eq_m256i(r, _mm256_setzero_si256());
20521        let r = _mm512_maskz_cvtepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20522        let e = _mm256_set1_epi8(2);
20523        assert_eq_m256i(r, e);
20524    }
20525
20526    #[simd_test(enable = "avx512bw,avx512vl")]
20527    unsafe fn test_mm256_cvtepi16_epi8() {
20528        let a = _mm256_set1_epi16(2);
20529        let r = _mm256_cvtepi16_epi8(a);
20530        let e = _mm_set1_epi8(2);
20531        assert_eq_m128i(r, e);
20532    }
20533
20534    #[simd_test(enable = "avx512bw,avx512vl")]
20535    unsafe fn test_mm256_mask_cvtepi16_epi8() {
20536        let src = _mm_set1_epi8(1);
20537        let a = _mm256_set1_epi16(2);
20538        let r = _mm256_mask_cvtepi16_epi8(src, 0, a);
20539        assert_eq_m128i(r, src);
20540        let r = _mm256_mask_cvtepi16_epi8(src, 0b11111111_11111111, a);
20541        let e = _mm_set1_epi8(2);
20542        assert_eq_m128i(r, e);
20543    }
20544
20545    #[simd_test(enable = "avx512bw,avx512vl")]
20546    unsafe fn test_mm256_maskz_cvtepi16_epi8() {
20547        let a = _mm256_set1_epi16(2);
20548        let r = _mm256_maskz_cvtepi16_epi8(0, a);
20549        assert_eq_m128i(r, _mm_setzero_si128());
20550        let r = _mm256_maskz_cvtepi16_epi8(0b11111111_11111111, a);
20551        let e = _mm_set1_epi8(2);
20552        assert_eq_m128i(r, e);
20553    }
20554
20555    #[simd_test(enable = "avx512bw,avx512vl")]
20556    unsafe fn test_mm_cvtepi16_epi8() {
20557        let a = _mm_set1_epi16(2);
20558        let r = _mm_cvtepi16_epi8(a);
20559        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20560        assert_eq_m128i(r, e);
20561    }
20562
20563    #[simd_test(enable = "avx512bw,avx512vl")]
20564    unsafe fn test_mm_mask_cvtepi16_epi8() {
20565        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20566        let a = _mm_set1_epi16(2);
20567        let r = _mm_mask_cvtepi16_epi8(src, 0, a);
20568        assert_eq_m128i(r, src);
20569        let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a);
20570        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20571        assert_eq_m128i(r, e);
20572    }
20573
20574    #[simd_test(enable = "avx512bw,avx512vl")]
20575    unsafe fn test_mm_maskz_cvtepi16_epi8() {
20576        let a = _mm_set1_epi16(2);
20577        let r = _mm_maskz_cvtepi16_epi8(0, a);
20578        assert_eq_m128i(r, _mm_setzero_si128());
20579        let r = _mm_maskz_cvtepi16_epi8(0b11111111, a);
20580        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20581        assert_eq_m128i(r, e);
20582    }
20583
20584    #[simd_test(enable = "avx512bw")]
20585    unsafe fn test_mm512_cvtsepi16_epi8() {
20586        let a = _mm512_set1_epi16(i16::MAX);
20587        let r = _mm512_cvtsepi16_epi8(a);
20588        let e = _mm256_set1_epi8(i8::MAX);
20589        assert_eq_m256i(r, e);
20590    }
20591
20592    #[simd_test(enable = "avx512bw")]
20593    unsafe fn test_mm512_mask_cvtsepi16_epi8() {
20594        let src = _mm256_set1_epi8(1);
20595        let a = _mm512_set1_epi16(i16::MAX);
20596        let r = _mm512_mask_cvtsepi16_epi8(src, 0, a);
20597        assert_eq_m256i(r, src);
20598        let r = _mm512_mask_cvtsepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20599        let e = _mm256_set1_epi8(i8::MAX);
20600        assert_eq_m256i(r, e);
20601    }
20602
20603    #[simd_test(enable = "avx512bw,avx512vl")]
20604    unsafe fn test_mm256_cvtsepi16_epi8() {
20605        let a = _mm256_set1_epi16(i16::MAX);
20606        let r = _mm256_cvtsepi16_epi8(a);
20607        let e = _mm_set1_epi8(i8::MAX);
20608        assert_eq_m128i(r, e);
20609    }
20610
20611    #[simd_test(enable = "avx512bw,avx512vl")]
20612    unsafe fn test_mm256_mask_cvtsepi16_epi8() {
20613        let src = _mm_set1_epi8(1);
20614        let a = _mm256_set1_epi16(i16::MAX);
20615        let r = _mm256_mask_cvtsepi16_epi8(src, 0, a);
20616        assert_eq_m128i(r, src);
20617        let r = _mm256_mask_cvtsepi16_epi8(src, 0b11111111_11111111, a);
20618        let e = _mm_set1_epi8(i8::MAX);
20619        assert_eq_m128i(r, e);
20620    }
20621
20622    #[simd_test(enable = "avx512bw,avx512vl")]
20623    unsafe fn test_mm256_maskz_cvtsepi16_epi8() {
20624        let a = _mm256_set1_epi16(i16::MAX);
20625        let r = _mm256_maskz_cvtsepi16_epi8(0, a);
20626        assert_eq_m128i(r, _mm_setzero_si128());
20627        let r = _mm256_maskz_cvtsepi16_epi8(0b11111111_11111111, a);
20628        let e = _mm_set1_epi8(i8::MAX);
20629        assert_eq_m128i(r, e);
20630    }
20631
20632    #[simd_test(enable = "avx512bw,avx512vl")]
20633    unsafe fn test_mm_cvtsepi16_epi8() {
20634        let a = _mm_set1_epi16(i16::MAX);
20635        let r = _mm_cvtsepi16_epi8(a);
20636        #[rustfmt::skip]
20637        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20638        assert_eq_m128i(r, e);
20639    }
20640
20641    #[simd_test(enable = "avx512bw,avx512vl")]
20642    unsafe fn test_mm_mask_cvtsepi16_epi8() {
20643        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20644        let a = _mm_set1_epi16(i16::MAX);
20645        let r = _mm_mask_cvtsepi16_epi8(src, 0, a);
20646        assert_eq_m128i(r, src);
20647        let r = _mm_mask_cvtsepi16_epi8(src, 0b11111111, a);
20648        #[rustfmt::skip]
20649        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20650        assert_eq_m128i(r, e);
20651    }
20652
20653    #[simd_test(enable = "avx512bw,avx512vl")]
20654    unsafe fn test_mm_maskz_cvtsepi16_epi8() {
20655        let a = _mm_set1_epi16(i16::MAX);
20656        let r = _mm_maskz_cvtsepi16_epi8(0, a);
20657        assert_eq_m128i(r, _mm_setzero_si128());
20658        let r = _mm_maskz_cvtsepi16_epi8(0b11111111, a);
20659        #[rustfmt::skip]
20660        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20661        assert_eq_m128i(r, e);
20662    }
20663
20664    #[simd_test(enable = "avx512bw")]
20665    unsafe fn test_mm512_maskz_cvtsepi16_epi8() {
20666        let a = _mm512_set1_epi16(i16::MAX);
20667        let r = _mm512_maskz_cvtsepi16_epi8(0, a);
20668        assert_eq_m256i(r, _mm256_setzero_si256());
20669        let r = _mm512_maskz_cvtsepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20670        let e = _mm256_set1_epi8(i8::MAX);
20671        assert_eq_m256i(r, e);
20672    }
20673
20674    #[simd_test(enable = "avx512bw")]
20675    unsafe fn test_mm512_cvtusepi16_epi8() {
20676        let a = _mm512_set1_epi16(i16::MIN);
20677        let r = _mm512_cvtusepi16_epi8(a);
20678        let e = _mm256_set1_epi8(-1);
20679        assert_eq_m256i(r, e);
20680    }
20681
20682    #[simd_test(enable = "avx512bw")]
20683    unsafe fn test_mm512_mask_cvtusepi16_epi8() {
20684        let src = _mm256_set1_epi8(1);
20685        let a = _mm512_set1_epi16(i16::MIN);
20686        let r = _mm512_mask_cvtusepi16_epi8(src, 0, a);
20687        assert_eq_m256i(r, src);
20688        let r = _mm512_mask_cvtusepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20689        let e = _mm256_set1_epi8(-1);
20690        assert_eq_m256i(r, e);
20691    }
20692
20693    #[simd_test(enable = "avx512bw")]
20694    unsafe fn test_mm512_maskz_cvtusepi16_epi8() {
20695        let a = _mm512_set1_epi16(i16::MIN);
20696        let r = _mm512_maskz_cvtusepi16_epi8(0, a);
20697        assert_eq_m256i(r, _mm256_setzero_si256());
20698        let r = _mm512_maskz_cvtusepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20699        let e = _mm256_set1_epi8(-1);
20700        assert_eq_m256i(r, e);
20701    }
20702
20703    #[simd_test(enable = "avx512bw,avx512vl")]
20704    unsafe fn test_mm256_cvtusepi16_epi8() {
20705        let a = _mm256_set1_epi16(i16::MIN);
20706        let r = _mm256_cvtusepi16_epi8(a);
20707        let e = _mm_set1_epi8(-1);
20708        assert_eq_m128i(r, e);
20709    }
20710
20711    #[simd_test(enable = "avx512bw,avx512vl")]
20712    unsafe fn test_mm256_mask_cvtusepi16_epi8() {
20713        let src = _mm_set1_epi8(1);
20714        let a = _mm256_set1_epi16(i16::MIN);
20715        let r = _mm256_mask_cvtusepi16_epi8(src, 0, a);
20716        assert_eq_m128i(r, src);
20717        let r = _mm256_mask_cvtusepi16_epi8(src, 0b11111111_11111111, a);
20718        let e = _mm_set1_epi8(-1);
20719        assert_eq_m128i(r, e);
20720    }
20721
20722    #[simd_test(enable = "avx512bw,avx512vl")]
20723    unsafe fn test_mm256_maskz_cvtusepi16_epi8() {
20724        let a = _mm256_set1_epi16(i16::MIN);
20725        let r = _mm256_maskz_cvtusepi16_epi8(0, a);
20726        assert_eq_m128i(r, _mm_setzero_si128());
20727        let r = _mm256_maskz_cvtusepi16_epi8(0b11111111_11111111, a);
20728        let e = _mm_set1_epi8(-1);
20729        assert_eq_m128i(r, e);
20730    }
20731
20732    #[simd_test(enable = "avx512bw,avx512vl")]
20733    unsafe fn test_mm_cvtusepi16_epi8() {
20734        let a = _mm_set1_epi16(i16::MIN);
20735        let r = _mm_cvtusepi16_epi8(a);
20736        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20737        assert_eq_m128i(r, e);
20738    }
20739
20740    #[simd_test(enable = "avx512bw,avx512vl")]
20741    unsafe fn test_mm_mask_cvtusepi16_epi8() {
20742        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20743        let a = _mm_set1_epi16(i16::MIN);
20744        let r = _mm_mask_cvtusepi16_epi8(src, 0, a);
20745        assert_eq_m128i(r, src);
20746        let r = _mm_mask_cvtusepi16_epi8(src, 0b11111111, a);
20747        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20748        assert_eq_m128i(r, e);
20749    }
20750
20751    #[simd_test(enable = "avx512bw,avx512vl")]
20752    unsafe fn test_mm_maskz_cvtusepi16_epi8() {
20753        let a = _mm_set1_epi16(i16::MIN);
20754        let r = _mm_maskz_cvtusepi16_epi8(0, a);
20755        assert_eq_m128i(r, _mm_setzero_si128());
20756        let r = _mm_maskz_cvtusepi16_epi8(0b11111111, a);
20757        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20758        assert_eq_m128i(r, e);
20759    }
20760
20761    #[simd_test(enable = "avx512bw")]
20762    unsafe fn test_mm512_cvtepi8_epi16() {
20763        let a = _mm256_set1_epi8(2);
20764        let r = _mm512_cvtepi8_epi16(a);
20765        let e = _mm512_set1_epi16(2);
20766        assert_eq_m512i(r, e);
20767    }
20768
20769    #[simd_test(enable = "avx512bw")]
20770    unsafe fn test_mm512_mask_cvtepi8_epi16() {
20771        let src = _mm512_set1_epi16(1);
20772        let a = _mm256_set1_epi8(2);
20773        let r = _mm512_mask_cvtepi8_epi16(src, 0, a);
20774        assert_eq_m512i(r, src);
20775        let r = _mm512_mask_cvtepi8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20776        let e = _mm512_set1_epi16(2);
20777        assert_eq_m512i(r, e);
20778    }
20779
20780    #[simd_test(enable = "avx512bw")]
20781    unsafe fn test_mm512_maskz_cvtepi8_epi16() {
20782        let a = _mm256_set1_epi8(2);
20783        let r = _mm512_maskz_cvtepi8_epi16(0, a);
20784        assert_eq_m512i(r, _mm512_setzero_si512());
20785        let r = _mm512_maskz_cvtepi8_epi16(0b11111111_11111111_11111111_11111111, a);
20786        let e = _mm512_set1_epi16(2);
20787        assert_eq_m512i(r, e);
20788    }
20789
20790    #[simd_test(enable = "avx512bw,avx512vl")]
20791    unsafe fn test_mm256_mask_cvtepi8_epi16() {
20792        let src = _mm256_set1_epi16(1);
20793        let a = _mm_set1_epi8(2);
20794        let r = _mm256_mask_cvtepi8_epi16(src, 0, a);
20795        assert_eq_m256i(r, src);
20796        let r = _mm256_mask_cvtepi8_epi16(src, 0b11111111_11111111, a);
20797        let e = _mm256_set1_epi16(2);
20798        assert_eq_m256i(r, e);
20799    }
20800
20801    #[simd_test(enable = "avx512bw,avx512vl")]
20802    unsafe fn test_mm256_maskz_cvtepi8_epi16() {
20803        let a = _mm_set1_epi8(2);
20804        let r = _mm256_maskz_cvtepi8_epi16(0, a);
20805        assert_eq_m256i(r, _mm256_setzero_si256());
20806        let r = _mm256_maskz_cvtepi8_epi16(0b11111111_11111111, a);
20807        let e = _mm256_set1_epi16(2);
20808        assert_eq_m256i(r, e);
20809    }
20810
20811    #[simd_test(enable = "avx512bw,avx512vl")]
20812    unsafe fn test_mm_mask_cvtepi8_epi16() {
20813        let src = _mm_set1_epi16(1);
20814        let a = _mm_set1_epi8(2);
20815        let r = _mm_mask_cvtepi8_epi16(src, 0, a);
20816        assert_eq_m128i(r, src);
20817        let r = _mm_mask_cvtepi8_epi16(src, 0b11111111, a);
20818        let e = _mm_set1_epi16(2);
20819        assert_eq_m128i(r, e);
20820    }
20821
20822    #[simd_test(enable = "avx512bw,avx512vl")]
20823    unsafe fn test_mm_maskz_cvtepi8_epi16() {
20824        let a = _mm_set1_epi8(2);
20825        let r = _mm_maskz_cvtepi8_epi16(0, a);
20826        assert_eq_m128i(r, _mm_setzero_si128());
20827        let r = _mm_maskz_cvtepi8_epi16(0b11111111, a);
20828        let e = _mm_set1_epi16(2);
20829        assert_eq_m128i(r, e);
20830    }
20831
20832    #[simd_test(enable = "avx512bw")]
20833    unsafe fn test_mm512_cvtepu8_epi16() {
20834        let a = _mm256_set1_epi8(2);
20835        let r = _mm512_cvtepu8_epi16(a);
20836        let e = _mm512_set1_epi16(2);
20837        assert_eq_m512i(r, e);
20838    }
20839
20840    #[simd_test(enable = "avx512bw")]
20841    unsafe fn test_mm512_mask_cvtepu8_epi16() {
20842        let src = _mm512_set1_epi16(1);
20843        let a = _mm256_set1_epi8(2);
20844        let r = _mm512_mask_cvtepu8_epi16(src, 0, a);
20845        assert_eq_m512i(r, src);
20846        let r = _mm512_mask_cvtepu8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20847        let e = _mm512_set1_epi16(2);
20848        assert_eq_m512i(r, e);
20849    }
20850
20851    #[simd_test(enable = "avx512bw")]
20852    unsafe fn test_mm512_maskz_cvtepu8_epi16() {
20853        let a = _mm256_set1_epi8(2);
20854        let r = _mm512_maskz_cvtepu8_epi16(0, a);
20855        assert_eq_m512i(r, _mm512_setzero_si512());
20856        let r = _mm512_maskz_cvtepu8_epi16(0b11111111_11111111_11111111_11111111, a);
20857        let e = _mm512_set1_epi16(2);
20858        assert_eq_m512i(r, e);
20859    }
20860
20861    #[simd_test(enable = "avx512bw,avx512vl")]
20862    unsafe fn test_mm256_mask_cvtepu8_epi16() {
20863        let src = _mm256_set1_epi16(1);
20864        let a = _mm_set1_epi8(2);
20865        let r = _mm256_mask_cvtepu8_epi16(src, 0, a);
20866        assert_eq_m256i(r, src);
20867        let r = _mm256_mask_cvtepu8_epi16(src, 0b11111111_11111111, a);
20868        let e = _mm256_set1_epi16(2);
20869        assert_eq_m256i(r, e);
20870    }
20871
20872    #[simd_test(enable = "avx512bw,avx512vl")]
20873    unsafe fn test_mm256_maskz_cvtepu8_epi16() {
20874        let a = _mm_set1_epi8(2);
20875        let r = _mm256_maskz_cvtepu8_epi16(0, a);
20876        assert_eq_m256i(r, _mm256_setzero_si256());
20877        let r = _mm256_maskz_cvtepu8_epi16(0b11111111_11111111, a);
20878        let e = _mm256_set1_epi16(2);
20879        assert_eq_m256i(r, e);
20880    }
20881
20882    #[simd_test(enable = "avx512bw,avx512vl")]
20883    unsafe fn test_mm_mask_cvtepu8_epi16() {
20884        let src = _mm_set1_epi16(1);
20885        let a = _mm_set1_epi8(2);
20886        let r = _mm_mask_cvtepu8_epi16(src, 0, a);
20887        assert_eq_m128i(r, src);
20888        let r = _mm_mask_cvtepu8_epi16(src, 0b11111111, a);
20889        let e = _mm_set1_epi16(2);
20890        assert_eq_m128i(r, e);
20891    }
20892
20893    #[simd_test(enable = "avx512bw,avx512vl")]
20894    unsafe fn test_mm_maskz_cvtepu8_epi16() {
20895        let a = _mm_set1_epi8(2);
20896        let r = _mm_maskz_cvtepu8_epi16(0, a);
20897        assert_eq_m128i(r, _mm_setzero_si128());
20898        let r = _mm_maskz_cvtepu8_epi16(0b11111111, a);
20899        let e = _mm_set1_epi16(2);
20900        assert_eq_m128i(r, e);
20901    }
20902
20903    #[simd_test(enable = "avx512bw")]
20904    unsafe fn test_mm512_bslli_epi128() {
20905        #[rustfmt::skip]
20906        let a = _mm512_set_epi8(
20907            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20908            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20909            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20910            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20911        );
20912        let r = _mm512_bslli_epi128::<9>(a);
20913        #[rustfmt::skip]
20914        let e = _mm512_set_epi8(
20915            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20916            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20917            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20918            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20919        );
20920        assert_eq_m512i(r, e);
20921    }
20922
20923    #[simd_test(enable = "avx512bw")]
20924    unsafe fn test_mm512_bsrli_epi128() {
20925        #[rustfmt::skip]
20926        let a = _mm512_set_epi8(
20927            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
20928            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
20929            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
20930            49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
20931        );
20932        let r = _mm512_bsrli_epi128::<3>(a);
20933        #[rustfmt::skip]
20934        let e = _mm512_set_epi8(
20935            0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
20936            0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
20937            0, 0, 0, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
20938            0, 0, 0, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
20939        );
20940        assert_eq_m512i(r, e);
20941    }
20942
20943    #[simd_test(enable = "avx512bw")]
20944    unsafe fn test_mm512_alignr_epi8() {
20945        #[rustfmt::skip]
20946        let a = _mm512_set_epi8(
20947            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20948            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20949            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20950            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20951        );
20952        let b = _mm512_set1_epi8(1);
20953        let r = _mm512_alignr_epi8::<14>(a, b);
20954        #[rustfmt::skip]
20955        let e = _mm512_set_epi8(
20956            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20957            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20958            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20959            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20960        );
20961        assert_eq_m512i(r, e);
20962    }
20963
20964    #[simd_test(enable = "avx512bw")]
20965    unsafe fn test_mm512_mask_alignr_epi8() {
20966        #[rustfmt::skip]
20967        let a = _mm512_set_epi8(
20968            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20969            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20970            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20971            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20972        );
20973        let b = _mm512_set1_epi8(1);
20974        let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b);
20975        assert_eq_m512i(r, a);
20976        let r = _mm512_mask_alignr_epi8::<14>(
20977            a,
20978            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20979            a,
20980            b,
20981        );
20982        #[rustfmt::skip]
20983        let e = _mm512_set_epi8(
20984            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20985            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20986            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20987            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20988        );
20989        assert_eq_m512i(r, e);
20990    }
20991
20992    #[simd_test(enable = "avx512bw")]
20993    unsafe fn test_mm512_maskz_alignr_epi8() {
20994        #[rustfmt::skip]
20995        let a = _mm512_set_epi8(
20996            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20997            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20998            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20999            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21000        );
21001        let b = _mm512_set1_epi8(1);
21002        let r = _mm512_maskz_alignr_epi8::<14>(0, a, b);
21003        assert_eq_m512i(r, _mm512_setzero_si512());
21004        let r = _mm512_maskz_alignr_epi8::<14>(
21005            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
21006            a,
21007            b,
21008        );
21009        #[rustfmt::skip]
21010        let e = _mm512_set_epi8(
21011            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21012            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21013            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21014            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21015        );
21016        assert_eq_m512i(r, e);
21017    }
21018
21019    #[simd_test(enable = "avx512bw,avx512vl")]
21020    unsafe fn test_mm256_mask_alignr_epi8() {
21021        #[rustfmt::skip]
21022        let a = _mm256_set_epi8(
21023            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21024            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21025        );
21026        let b = _mm256_set1_epi8(1);
21027        let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b);
21028        assert_eq_m256i(r, a);
21029        let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b);
21030        #[rustfmt::skip]
21031        let e = _mm256_set_epi8(
21032            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21033            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21034        );
21035        assert_eq_m256i(r, e);
21036    }
21037
21038    #[simd_test(enable = "avx512bw,avx512vl")]
21039    unsafe fn test_mm256_maskz_alignr_epi8() {
21040        #[rustfmt::skip]
21041        let a = _mm256_set_epi8(
21042            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21043            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21044        );
21045        let b = _mm256_set1_epi8(1);
21046        let r = _mm256_maskz_alignr_epi8::<14>(0, a, b);
21047        assert_eq_m256i(r, _mm256_setzero_si256());
21048        let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b);
21049        #[rustfmt::skip]
21050        let e = _mm256_set_epi8(
21051            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21052            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21053        );
21054        assert_eq_m256i(r, e);
21055    }
21056
21057    #[simd_test(enable = "avx512bw,avx512vl")]
21058    unsafe fn test_mm_mask_alignr_epi8() {
21059        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
21060        let b = _mm_set1_epi8(1);
21061        let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b);
21062        assert_eq_m128i(r, a);
21063        let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b);
21064        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21065        assert_eq_m128i(r, e);
21066    }
21067
21068    #[simd_test(enable = "avx512bw,avx512vl")]
21069    unsafe fn test_mm_maskz_alignr_epi8() {
21070        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
21071        let b = _mm_set1_epi8(1);
21072        let r = _mm_maskz_alignr_epi8::<14>(0, a, b);
21073        assert_eq_m128i(r, _mm_setzero_si128());
21074        let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b);
21075        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21076        assert_eq_m128i(r, e);
21077    }
21078
21079    #[simd_test(enable = "avx512bw")]
21080    unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() {
21081        let a = _mm512_set1_epi16(i16::MAX);
21082        let mut r = _mm256_undefined_si256();
21083        _mm512_mask_cvtsepi16_storeu_epi8(
21084            &mut r as *mut _ as *mut i8,
21085            0b11111111_11111111_11111111_11111111,
21086            a,
21087        );
21088        let e = _mm256_set1_epi8(i8::MAX);
21089        assert_eq_m256i(r, e);
21090    }
21091
21092    #[simd_test(enable = "avx512bw,avx512vl")]
21093    unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() {
21094        let a = _mm256_set1_epi16(i16::MAX);
21095        let mut r = _mm_undefined_si128();
21096        _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21097        let e = _mm_set1_epi8(i8::MAX);
21098        assert_eq_m128i(r, e);
21099    }
21100
21101    #[simd_test(enable = "avx512bw,avx512vl")]
21102    unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() {
21103        let a = _mm_set1_epi16(i16::MAX);
21104        let mut r = _mm_set1_epi8(0);
21105        _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21106        #[rustfmt::skip]
21107        let e = _mm_set_epi8(
21108            0, 0, 0, 0, 0, 0, 0, 0,
21109            i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
21110        );
21111        assert_eq_m128i(r, e);
21112    }
21113
21114    #[simd_test(enable = "avx512bw")]
21115    unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() {
21116        let a = _mm512_set1_epi16(8);
21117        let mut r = _mm256_undefined_si256();
21118        _mm512_mask_cvtepi16_storeu_epi8(
21119            &mut r as *mut _ as *mut i8,
21120            0b11111111_11111111_11111111_11111111,
21121            a,
21122        );
21123        let e = _mm256_set1_epi8(8);
21124        assert_eq_m256i(r, e);
21125    }
21126
21127    #[simd_test(enable = "avx512bw,avx512vl")]
21128    unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() {
21129        let a = _mm256_set1_epi16(8);
21130        let mut r = _mm_undefined_si128();
21131        _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21132        let e = _mm_set1_epi8(8);
21133        assert_eq_m128i(r, e);
21134    }
21135
21136    #[simd_test(enable = "avx512bw,avx512vl")]
21137    unsafe fn test_mm_mask_cvtepi16_storeu_epi8() {
21138        let a = _mm_set1_epi16(8);
21139        let mut r = _mm_set1_epi8(0);
21140        _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21141        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
21142        assert_eq_m128i(r, e);
21143    }
21144
21145    #[simd_test(enable = "avx512bw")]
21146    unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() {
21147        let a = _mm512_set1_epi16(i16::MAX);
21148        let mut r = _mm256_undefined_si256();
21149        _mm512_mask_cvtusepi16_storeu_epi8(
21150            &mut r as *mut _ as *mut i8,
21151            0b11111111_11111111_11111111_11111111,
21152            a,
21153        );
21154        let e = _mm256_set1_epi8(u8::MAX as i8);
21155        assert_eq_m256i(r, e);
21156    }
21157
21158    #[simd_test(enable = "avx512bw,avx512vl")]
21159    unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() {
21160        let a = _mm256_set1_epi16(i16::MAX);
21161        let mut r = _mm_undefined_si128();
21162        _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21163        let e = _mm_set1_epi8(u8::MAX as i8);
21164        assert_eq_m128i(r, e);
21165    }
21166
21167    #[simd_test(enable = "avx512bw,avx512vl")]
21168    unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() {
21169        let a = _mm_set1_epi16(i16::MAX);
21170        let mut r = _mm_set1_epi8(0);
21171        _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21172        #[rustfmt::skip]
21173        let e = _mm_set_epi8(
21174            0, 0, 0, 0,
21175            0, 0, 0, 0,
21176            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, 
21177            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
21178        );
21179        assert_eq_m128i(r, e);
21180    }
21181}