core/stdarch/crates/core_arch/src/x86/
avx512f.rs

1use crate::{
2    arch::asm,
3    core_arch::{simd::*, x86::*},
4    intrinsics::simd::*,
5    intrinsics::{fmaf32, fmaf64},
6    mem, ptr,
7};
8
9use core::hint::unreachable_unchecked;
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13/// Computes the absolute values of packed 32-bit integers in `a`.
14///
15/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
16#[inline]
17#[target_feature(enable = "avx512f")]
18#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19#[cfg_attr(test, assert_instr(vpabsd))]
20pub fn _mm512_abs_epi32(a: __m512i) -> __m512i {
21    unsafe {
22        let a = a.as_i32x16();
23        let r = simd_select::<i32x16, _>(simd_lt(a, i32x16::ZERO), simd_neg(a), a);
24        transmute(r)
25    }
26}
27
28/// Computes the absolute value of packed 32-bit integers in `a`, and store the
29/// unsigned results in `dst` using writemask `k` (elements are copied from
30/// `src` when the corresponding mask bit is not set).
31///
32/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
33#[inline]
34#[target_feature(enable = "avx512f")]
35#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36#[cfg_attr(test, assert_instr(vpabsd))]
37pub fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
38    unsafe {
39        let abs = _mm512_abs_epi32(a).as_i32x16();
40        transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
41    }
42}
43
44/// Computes the absolute value of packed 32-bit integers in `a`, and store the
45/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
46/// the corresponding mask bit is not set).
47///
48/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
49#[inline]
50#[target_feature(enable = "avx512f")]
51#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
52#[cfg_attr(test, assert_instr(vpabsd))]
53pub fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
54    unsafe {
55        let abs = _mm512_abs_epi32(a).as_i32x16();
56        transmute(simd_select_bitmask(k, abs, i32x16::ZERO))
57    }
58}
59
60/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
61///
62/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
63#[inline]
64#[target_feature(enable = "avx512f,avx512vl")]
65#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
66#[cfg_attr(test, assert_instr(vpabsd))]
67pub fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
68    unsafe {
69        let abs = _mm256_abs_epi32(a).as_i32x8();
70        transmute(simd_select_bitmask(k, abs, src.as_i32x8()))
71    }
72}
73
74/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
75///
76/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
77#[inline]
78#[target_feature(enable = "avx512f,avx512vl")]
79#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
80#[cfg_attr(test, assert_instr(vpabsd))]
81pub fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
82    unsafe {
83        let abs = _mm256_abs_epi32(a).as_i32x8();
84        transmute(simd_select_bitmask(k, abs, i32x8::ZERO))
85    }
86}
87
88/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
89///
90/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
91#[inline]
92#[target_feature(enable = "avx512f,avx512vl")]
93#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
94#[cfg_attr(test, assert_instr(vpabsd))]
95pub fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
96    unsafe {
97        let abs = _mm_abs_epi32(a).as_i32x4();
98        transmute(simd_select_bitmask(k, abs, src.as_i32x4()))
99    }
100}
101
102/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
105#[inline]
106#[target_feature(enable = "avx512f,avx512vl")]
107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
108#[cfg_attr(test, assert_instr(vpabsd))]
109pub fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
110    unsafe {
111        let abs = _mm_abs_epi32(a).as_i32x4();
112        transmute(simd_select_bitmask(k, abs, i32x4::ZERO))
113    }
114}
115
116/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
119#[inline]
120#[target_feature(enable = "avx512f")]
121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
122#[cfg_attr(test, assert_instr(vpabsq))]
123pub fn _mm512_abs_epi64(a: __m512i) -> __m512i {
124    unsafe {
125        let a = a.as_i64x8();
126        let r = simd_select::<i64x8, _>(simd_lt(a, i64x8::ZERO), simd_neg(a), a);
127        transmute(r)
128    }
129}
130
131/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
132///
133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
134#[inline]
135#[target_feature(enable = "avx512f")]
136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
137#[cfg_attr(test, assert_instr(vpabsq))]
138pub fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
139    unsafe {
140        let abs = _mm512_abs_epi64(a).as_i64x8();
141        transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
142    }
143}
144
145/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
146///
147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
148#[inline]
149#[target_feature(enable = "avx512f")]
150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
151#[cfg_attr(test, assert_instr(vpabsq))]
152pub fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
153    unsafe {
154        let abs = _mm512_abs_epi64(a).as_i64x8();
155        transmute(simd_select_bitmask(k, abs, i64x8::ZERO))
156    }
157}
158
159/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
160///
161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
162#[inline]
163#[target_feature(enable = "avx512f,avx512vl")]
164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
165#[cfg_attr(test, assert_instr(vpabsq))]
166pub fn _mm256_abs_epi64(a: __m256i) -> __m256i {
167    unsafe {
168        let a = a.as_i64x4();
169        let r = simd_select::<i64x4, _>(simd_lt(a, i64x4::ZERO), simd_neg(a), a);
170        transmute(r)
171    }
172}
173
174/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
175///
176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
177#[inline]
178#[target_feature(enable = "avx512f,avx512vl")]
179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
180#[cfg_attr(test, assert_instr(vpabsq))]
181pub fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
182    unsafe {
183        let abs = _mm256_abs_epi64(a).as_i64x4();
184        transmute(simd_select_bitmask(k, abs, src.as_i64x4()))
185    }
186}
187
188/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
189///
190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
191#[inline]
192#[target_feature(enable = "avx512f,avx512vl")]
193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
194#[cfg_attr(test, assert_instr(vpabsq))]
195pub fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
196    unsafe {
197        let abs = _mm256_abs_epi64(a).as_i64x4();
198        transmute(simd_select_bitmask(k, abs, i64x4::ZERO))
199    }
200}
201
202/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
203///
204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
205#[inline]
206#[target_feature(enable = "avx512f,avx512vl")]
207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
208#[cfg_attr(test, assert_instr(vpabsq))]
209pub fn _mm_abs_epi64(a: __m128i) -> __m128i {
210    unsafe {
211        let a = a.as_i64x2();
212        let r = simd_select::<i64x2, _>(simd_lt(a, i64x2::ZERO), simd_neg(a), a);
213        transmute(r)
214    }
215}
216
217/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
218///
219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
220#[inline]
221#[target_feature(enable = "avx512f,avx512vl")]
222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
223#[cfg_attr(test, assert_instr(vpabsq))]
224pub fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
225    unsafe {
226        let abs = _mm_abs_epi64(a).as_i64x2();
227        transmute(simd_select_bitmask(k, abs, src.as_i64x2()))
228    }
229}
230
231/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
234#[inline]
235#[target_feature(enable = "avx512f,avx512vl")]
236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
237#[cfg_attr(test, assert_instr(vpabsq))]
238pub fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
239    unsafe {
240        let abs = _mm_abs_epi64(a).as_i64x2();
241        transmute(simd_select_bitmask(k, abs, i64x2::ZERO))
242    }
243}
244
245/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
246///
247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
248#[inline]
249#[target_feature(enable = "avx512f")]
250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
251#[cfg_attr(test, assert_instr(vpandd))]
252pub fn _mm512_abs_ps(v2: __m512) -> __m512 {
253    unsafe { simd_fabs(v2) }
254}
255
256/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
257///
258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
259#[inline]
260#[target_feature(enable = "avx512f")]
261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
262#[cfg_attr(test, assert_instr(vpandd))]
263pub fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
264    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
265}
266
267/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
268///
269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
270#[inline]
271#[target_feature(enable = "avx512f")]
272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
273#[cfg_attr(test, assert_instr(vpandq))]
274pub fn _mm512_abs_pd(v2: __m512d) -> __m512d {
275    unsafe { simd_fabs(v2) }
276}
277
278/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
279///
280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
281#[inline]
282#[target_feature(enable = "avx512f")]
283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
284#[cfg_attr(test, assert_instr(vpandq))]
285pub fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
286    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
287}
288
289/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
290///
291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
292#[inline]
293#[target_feature(enable = "avx512f")]
294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
295#[cfg_attr(test, assert_instr(vmovdqa32))]
296pub fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
297    unsafe {
298        let mov = a.as_i32x16();
299        transmute(simd_select_bitmask(k, mov, src.as_i32x16()))
300    }
301}
302
303/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
304///
305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
306#[inline]
307#[target_feature(enable = "avx512f")]
308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
309#[cfg_attr(test, assert_instr(vmovdqa32))]
310pub fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
311    unsafe {
312        let mov = a.as_i32x16();
313        transmute(simd_select_bitmask(k, mov, i32x16::ZERO))
314    }
315}
316
317/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
318///
319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
320#[inline]
321#[target_feature(enable = "avx512f,avx512vl")]
322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
323#[cfg_attr(test, assert_instr(vmovdqa32))]
324pub fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
325    unsafe {
326        let mov = a.as_i32x8();
327        transmute(simd_select_bitmask(k, mov, src.as_i32x8()))
328    }
329}
330
331/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
332///
333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
334#[inline]
335#[target_feature(enable = "avx512f,avx512vl")]
336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
337#[cfg_attr(test, assert_instr(vmovdqa32))]
338pub fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
339    unsafe {
340        let mov = a.as_i32x8();
341        transmute(simd_select_bitmask(k, mov, i32x8::ZERO))
342    }
343}
344
345/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
346///
347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
348#[inline]
349#[target_feature(enable = "avx512f,avx512vl")]
350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
351#[cfg_attr(test, assert_instr(vmovdqa32))]
352pub fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
353    unsafe {
354        let mov = a.as_i32x4();
355        transmute(simd_select_bitmask(k, mov, src.as_i32x4()))
356    }
357}
358
359/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
360///
361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
362#[inline]
363#[target_feature(enable = "avx512f,avx512vl")]
364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
365#[cfg_attr(test, assert_instr(vmovdqa32))]
366pub fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
367    unsafe {
368        let mov = a.as_i32x4();
369        transmute(simd_select_bitmask(k, mov, i32x4::ZERO))
370    }
371}
372
373/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
374///
375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
376#[inline]
377#[target_feature(enable = "avx512f")]
378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
379#[cfg_attr(test, assert_instr(vmovdqa64))]
380pub fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
381    unsafe {
382        let mov = a.as_i64x8();
383        transmute(simd_select_bitmask(k, mov, src.as_i64x8()))
384    }
385}
386
387/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
388///
389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
390#[inline]
391#[target_feature(enable = "avx512f")]
392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
393#[cfg_attr(test, assert_instr(vmovdqa64))]
394pub fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
395    unsafe {
396        let mov = a.as_i64x8();
397        transmute(simd_select_bitmask(k, mov, i64x8::ZERO))
398    }
399}
400
401/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
402///
403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
404#[inline]
405#[target_feature(enable = "avx512f,avx512vl")]
406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
407#[cfg_attr(test, assert_instr(vmovdqa64))]
408pub fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
409    unsafe {
410        let mov = a.as_i64x4();
411        transmute(simd_select_bitmask(k, mov, src.as_i64x4()))
412    }
413}
414
415/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
416///
417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
418#[inline]
419#[target_feature(enable = "avx512f,avx512vl")]
420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
421#[cfg_attr(test, assert_instr(vmovdqa64))]
422pub fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
423    unsafe {
424        let mov = a.as_i64x4();
425        transmute(simd_select_bitmask(k, mov, i64x4::ZERO))
426    }
427}
428
429/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
432#[inline]
433#[target_feature(enable = "avx512f,avx512vl")]
434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
435#[cfg_attr(test, assert_instr(vmovdqa64))]
436pub fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
437    unsafe {
438        let mov = a.as_i64x2();
439        transmute(simd_select_bitmask(k, mov, src.as_i64x2()))
440    }
441}
442
443/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
444///
445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
446#[inline]
447#[target_feature(enable = "avx512f,avx512vl")]
448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
449#[cfg_attr(test, assert_instr(vmovdqa64))]
450pub fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
451    unsafe {
452        let mov = a.as_i64x2();
453        transmute(simd_select_bitmask(k, mov, i64x2::ZERO))
454    }
455}
456
457/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
458///
459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
460#[inline]
461#[target_feature(enable = "avx512f")]
462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
463#[cfg_attr(test, assert_instr(vmovaps))]
464pub fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
465    unsafe {
466        let mov = a.as_f32x16();
467        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
468    }
469}
470
471/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
472///
473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
474#[inline]
475#[target_feature(enable = "avx512f")]
476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
477#[cfg_attr(test, assert_instr(vmovaps))]
478pub fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
479    unsafe {
480        let mov = a.as_f32x16();
481        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
482    }
483}
484
485/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
486///
487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
488#[inline]
489#[target_feature(enable = "avx512f,avx512vl")]
490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
491#[cfg_attr(test, assert_instr(vmovaps))]
492pub fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
493    unsafe {
494        let mov = a.as_f32x8();
495        transmute(simd_select_bitmask(k, mov, src.as_f32x8()))
496    }
497}
498
499/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
500///
501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
502#[inline]
503#[target_feature(enable = "avx512f,avx512vl")]
504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
505#[cfg_attr(test, assert_instr(vmovaps))]
506pub fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
507    unsafe {
508        let mov = a.as_f32x8();
509        transmute(simd_select_bitmask(k, mov, f32x8::ZERO))
510    }
511}
512
513/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
516#[inline]
517#[target_feature(enable = "avx512f,avx512vl")]
518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
519#[cfg_attr(test, assert_instr(vmovaps))]
520pub fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
521    unsafe {
522        let mov = a.as_f32x4();
523        transmute(simd_select_bitmask(k, mov, src.as_f32x4()))
524    }
525}
526
527/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
528///
529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
530#[inline]
531#[target_feature(enable = "avx512f,avx512vl")]
532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
533#[cfg_attr(test, assert_instr(vmovaps))]
534pub fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
535    unsafe {
536        let mov = a.as_f32x4();
537        transmute(simd_select_bitmask(k, mov, f32x4::ZERO))
538    }
539}
540
541/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
542///
543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
544#[inline]
545#[target_feature(enable = "avx512f")]
546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
547#[cfg_attr(test, assert_instr(vmovapd))]
548pub fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
549    unsafe {
550        let mov = a.as_f64x8();
551        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
552    }
553}
554
555/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
556///
557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
558#[inline]
559#[target_feature(enable = "avx512f")]
560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
561#[cfg_attr(test, assert_instr(vmovapd))]
562pub fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
563    unsafe {
564        let mov = a.as_f64x8();
565        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
566    }
567}
568
569/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
570///
571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
572#[inline]
573#[target_feature(enable = "avx512f,avx512vl")]
574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
575#[cfg_attr(test, assert_instr(vmovapd))]
576pub fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
577    unsafe {
578        let mov = a.as_f64x4();
579        transmute(simd_select_bitmask(k, mov, src.as_f64x4()))
580    }
581}
582
583/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
584///
585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
586#[inline]
587#[target_feature(enable = "avx512f,avx512vl")]
588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
589#[cfg_attr(test, assert_instr(vmovapd))]
590pub fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
591    unsafe {
592        let mov = a.as_f64x4();
593        transmute(simd_select_bitmask(k, mov, f64x4::ZERO))
594    }
595}
596
597/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
598///
599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
600#[inline]
601#[target_feature(enable = "avx512f,avx512vl")]
602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
603#[cfg_attr(test, assert_instr(vmovapd))]
604pub fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
605    unsafe {
606        let mov = a.as_f64x2();
607        transmute(simd_select_bitmask(k, mov, src.as_f64x2()))
608    }
609}
610
611/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
612///
613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
614#[inline]
615#[target_feature(enable = "avx512f,avx512vl")]
616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
617#[cfg_attr(test, assert_instr(vmovapd))]
618pub fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
619    unsafe {
620        let mov = a.as_f64x2();
621        transmute(simd_select_bitmask(k, mov, f64x2::ZERO))
622    }
623}
624
625/// Add packed 32-bit integers in a and b, and store the results in dst.
626///
627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
628#[inline]
629#[target_feature(enable = "avx512f")]
630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
631#[cfg_attr(test, assert_instr(vpaddd))]
632pub fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
633    unsafe { transmute(simd_add(a.as_i32x16(), b.as_i32x16())) }
634}
635
636/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
637///
638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
639#[inline]
640#[target_feature(enable = "avx512f")]
641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
642#[cfg_attr(test, assert_instr(vpaddd))]
643pub fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
644    unsafe {
645        let add = _mm512_add_epi32(a, b).as_i32x16();
646        transmute(simd_select_bitmask(k, add, src.as_i32x16()))
647    }
648}
649
650/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
651///
652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
653#[inline]
654#[target_feature(enable = "avx512f")]
655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
656#[cfg_attr(test, assert_instr(vpaddd))]
657pub fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
658    unsafe {
659        let add = _mm512_add_epi32(a, b).as_i32x16();
660        transmute(simd_select_bitmask(k, add, i32x16::ZERO))
661    }
662}
663
664/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
665///
666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
667#[inline]
668#[target_feature(enable = "avx512f,avx512vl")]
669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
670#[cfg_attr(test, assert_instr(vpaddd))]
671pub fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
672    unsafe {
673        let add = _mm256_add_epi32(a, b).as_i32x8();
674        transmute(simd_select_bitmask(k, add, src.as_i32x8()))
675    }
676}
677
678/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
679///
680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
681#[inline]
682#[target_feature(enable = "avx512f,avx512vl")]
683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
684#[cfg_attr(test, assert_instr(vpaddd))]
685pub fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
686    unsafe {
687        let add = _mm256_add_epi32(a, b).as_i32x8();
688        transmute(simd_select_bitmask(k, add, i32x8::ZERO))
689    }
690}
691
692/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
693///
694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
695#[inline]
696#[target_feature(enable = "avx512f,avx512vl")]
697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
698#[cfg_attr(test, assert_instr(vpaddd))]
699pub fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
700    unsafe {
701        let add = _mm_add_epi32(a, b).as_i32x4();
702        transmute(simd_select_bitmask(k, add, src.as_i32x4()))
703    }
704}
705
706/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
707///
708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
709#[inline]
710#[target_feature(enable = "avx512f,avx512vl")]
711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
712#[cfg_attr(test, assert_instr(vpaddd))]
713pub fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
714    unsafe {
715        let add = _mm_add_epi32(a, b).as_i32x4();
716        transmute(simd_select_bitmask(k, add, i32x4::ZERO))
717    }
718}
719
720/// Add packed 64-bit integers in a and b, and store the results in dst.
721///
722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
723#[inline]
724#[target_feature(enable = "avx512f")]
725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
726#[cfg_attr(test, assert_instr(vpaddq))]
727pub fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
728    unsafe { transmute(simd_add(a.as_i64x8(), b.as_i64x8())) }
729}
730
731/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
732///
733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
734#[inline]
735#[target_feature(enable = "avx512f")]
736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
737#[cfg_attr(test, assert_instr(vpaddq))]
738pub fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
739    unsafe {
740        let add = _mm512_add_epi64(a, b).as_i64x8();
741        transmute(simd_select_bitmask(k, add, src.as_i64x8()))
742    }
743}
744
745/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
746///
747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
748#[inline]
749#[target_feature(enable = "avx512f")]
750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
751#[cfg_attr(test, assert_instr(vpaddq))]
752pub fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
753    unsafe {
754        let add = _mm512_add_epi64(a, b).as_i64x8();
755        transmute(simd_select_bitmask(k, add, i64x8::ZERO))
756    }
757}
758
759/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
762#[inline]
763#[target_feature(enable = "avx512f,avx512vl")]
764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
765#[cfg_attr(test, assert_instr(vpaddq))]
766pub fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
767    unsafe {
768        let add = _mm256_add_epi64(a, b).as_i64x4();
769        transmute(simd_select_bitmask(k, add, src.as_i64x4()))
770    }
771}
772
773/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
774///
775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
776#[inline]
777#[target_feature(enable = "avx512f,avx512vl")]
778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
779#[cfg_attr(test, assert_instr(vpaddq))]
780pub fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
781    unsafe {
782        let add = _mm256_add_epi64(a, b).as_i64x4();
783        transmute(simd_select_bitmask(k, add, i64x4::ZERO))
784    }
785}
786
787/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
788///
789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
790#[inline]
791#[target_feature(enable = "avx512f,avx512vl")]
792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
793#[cfg_attr(test, assert_instr(vpaddq))]
794pub fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
795    unsafe {
796        let add = _mm_add_epi64(a, b).as_i64x2();
797        transmute(simd_select_bitmask(k, add, src.as_i64x2()))
798    }
799}
800
801/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
802///
803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
804#[inline]
805#[target_feature(enable = "avx512f,avx512vl")]
806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
807#[cfg_attr(test, assert_instr(vpaddq))]
808pub fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
809    unsafe {
810        let add = _mm_add_epi64(a, b).as_i64x2();
811        transmute(simd_select_bitmask(k, add, i64x2::ZERO))
812    }
813}
814
815/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
816///
817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
818#[inline]
819#[target_feature(enable = "avx512f")]
820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
821#[cfg_attr(test, assert_instr(vaddps))]
822pub fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
823    unsafe { transmute(simd_add(a.as_f32x16(), b.as_f32x16())) }
824}
825
826/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
827///
828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
829#[inline]
830#[target_feature(enable = "avx512f")]
831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
832#[cfg_attr(test, assert_instr(vaddps))]
833pub fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
834    unsafe {
835        let add = _mm512_add_ps(a, b).as_f32x16();
836        transmute(simd_select_bitmask(k, add, src.as_f32x16()))
837    }
838}
839
840/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
841///
842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
843#[inline]
844#[target_feature(enable = "avx512f")]
845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
846#[cfg_attr(test, assert_instr(vaddps))]
847pub fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
848    unsafe {
849        let add = _mm512_add_ps(a, b).as_f32x16();
850        transmute(simd_select_bitmask(k, add, f32x16::ZERO))
851    }
852}
853
854/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
855///
856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
857#[inline]
858#[target_feature(enable = "avx512f,avx512vl")]
859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
860#[cfg_attr(test, assert_instr(vaddps))]
861pub fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
862    unsafe {
863        let add = _mm256_add_ps(a, b).as_f32x8();
864        transmute(simd_select_bitmask(k, add, src.as_f32x8()))
865    }
866}
867
868/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
869///
870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
871#[inline]
872#[target_feature(enable = "avx512f,avx512vl")]
873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
874#[cfg_attr(test, assert_instr(vaddps))]
875pub fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
876    unsafe {
877        let add = _mm256_add_ps(a, b).as_f32x8();
878        transmute(simd_select_bitmask(k, add, f32x8::ZERO))
879    }
880}
881
882/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
883///
884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
885#[inline]
886#[target_feature(enable = "avx512f,avx512vl")]
887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
888#[cfg_attr(test, assert_instr(vaddps))]
889pub fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
890    unsafe {
891        let add = _mm_add_ps(a, b).as_f32x4();
892        transmute(simd_select_bitmask(k, add, src.as_f32x4()))
893    }
894}
895
896/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
897///
898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
899#[inline]
900#[target_feature(enable = "avx512f,avx512vl")]
901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
902#[cfg_attr(test, assert_instr(vaddps))]
903pub fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
904    unsafe {
905        let add = _mm_add_ps(a, b).as_f32x4();
906        transmute(simd_select_bitmask(k, add, f32x4::ZERO))
907    }
908}
909
910/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
911///
912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
913#[inline]
914#[target_feature(enable = "avx512f")]
915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
916#[cfg_attr(test, assert_instr(vaddpd))]
917pub fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
918    unsafe { transmute(simd_add(a.as_f64x8(), b.as_f64x8())) }
919}
920
921/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
922///
923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
924#[inline]
925#[target_feature(enable = "avx512f")]
926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
927#[cfg_attr(test, assert_instr(vaddpd))]
928pub fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
929    unsafe {
930        let add = _mm512_add_pd(a, b).as_f64x8();
931        transmute(simd_select_bitmask(k, add, src.as_f64x8()))
932    }
933}
934
935/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
936///
937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
938#[inline]
939#[target_feature(enable = "avx512f")]
940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
941#[cfg_attr(test, assert_instr(vaddpd))]
942pub fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
943    unsafe {
944        let add = _mm512_add_pd(a, b).as_f64x8();
945        transmute(simd_select_bitmask(k, add, f64x8::ZERO))
946    }
947}
948
949/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
950///
951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
952#[inline]
953#[target_feature(enable = "avx512f,avx512vl")]
954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
955#[cfg_attr(test, assert_instr(vaddpd))]
956pub fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
957    unsafe {
958        let add = _mm256_add_pd(a, b).as_f64x4();
959        transmute(simd_select_bitmask(k, add, src.as_f64x4()))
960    }
961}
962
963/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
964///
965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
966#[inline]
967#[target_feature(enable = "avx512f,avx512vl")]
968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
969#[cfg_attr(test, assert_instr(vaddpd))]
970pub fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
971    unsafe {
972        let add = _mm256_add_pd(a, b).as_f64x4();
973        transmute(simd_select_bitmask(k, add, f64x4::ZERO))
974    }
975}
976
977/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
978///
979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
980#[inline]
981#[target_feature(enable = "avx512f,avx512vl")]
982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
983#[cfg_attr(test, assert_instr(vaddpd))]
984pub fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
985    unsafe {
986        let add = _mm_add_pd(a, b).as_f64x2();
987        transmute(simd_select_bitmask(k, add, src.as_f64x2()))
988    }
989}
990
991/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
992///
993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
994#[inline]
995#[target_feature(enable = "avx512f,avx512vl")]
996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
997#[cfg_attr(test, assert_instr(vaddpd))]
998pub fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
999    unsafe {
1000        let add = _mm_add_pd(a, b).as_f64x2();
1001        transmute(simd_select_bitmask(k, add, f64x2::ZERO))
1002    }
1003}
1004
1005/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
1006///
1007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
1008#[inline]
1009#[target_feature(enable = "avx512f")]
1010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1011#[cfg_attr(test, assert_instr(vpsubd))]
1012pub fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
1013    unsafe { transmute(simd_sub(a.as_i32x16(), b.as_i32x16())) }
1014}
1015
1016/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1017///
1018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
1019#[inline]
1020#[target_feature(enable = "avx512f")]
1021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1022#[cfg_attr(test, assert_instr(vpsubd))]
1023pub fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1024    unsafe {
1025        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1026        transmute(simd_select_bitmask(k, sub, src.as_i32x16()))
1027    }
1028}
1029
1030/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1031///
1032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
1033#[inline]
1034#[target_feature(enable = "avx512f")]
1035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1036#[cfg_attr(test, assert_instr(vpsubd))]
1037pub fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1038    unsafe {
1039        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1040        transmute(simd_select_bitmask(k, sub, i32x16::ZERO))
1041    }
1042}
1043
1044/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1045///
1046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
1047#[inline]
1048#[target_feature(enable = "avx512f,avx512vl")]
1049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1050#[cfg_attr(test, assert_instr(vpsubd))]
1051pub fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1052    unsafe {
1053        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1054        transmute(simd_select_bitmask(k, sub, src.as_i32x8()))
1055    }
1056}
1057
1058/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1059///
1060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
1061#[inline]
1062#[target_feature(enable = "avx512f,avx512vl")]
1063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1064#[cfg_attr(test, assert_instr(vpsubd))]
1065pub fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1066    unsafe {
1067        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1068        transmute(simd_select_bitmask(k, sub, i32x8::ZERO))
1069    }
1070}
1071
1072/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1073///
1074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
1075#[inline]
1076#[target_feature(enable = "avx512f,avx512vl")]
1077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1078#[cfg_attr(test, assert_instr(vpsubd))]
1079pub fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1080    unsafe {
1081        let sub = _mm_sub_epi32(a, b).as_i32x4();
1082        transmute(simd_select_bitmask(k, sub, src.as_i32x4()))
1083    }
1084}
1085
1086/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1087///
1088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
1089#[inline]
1090#[target_feature(enable = "avx512f,avx512vl")]
1091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1092#[cfg_attr(test, assert_instr(vpsubd))]
1093pub fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1094    unsafe {
1095        let sub = _mm_sub_epi32(a, b).as_i32x4();
1096        transmute(simd_select_bitmask(k, sub, i32x4::ZERO))
1097    }
1098}
1099
1100/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
1101///
1102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
1103#[inline]
1104#[target_feature(enable = "avx512f")]
1105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1106#[cfg_attr(test, assert_instr(vpsubq))]
1107pub fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
1108    unsafe { transmute(simd_sub(a.as_i64x8(), b.as_i64x8())) }
1109}
1110
1111/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1112///
1113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
1114#[inline]
1115#[target_feature(enable = "avx512f")]
1116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1117#[cfg_attr(test, assert_instr(vpsubq))]
1118pub fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1119    unsafe {
1120        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1121        transmute(simd_select_bitmask(k, sub, src.as_i64x8()))
1122    }
1123}
1124
1125/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1126///
1127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
1128#[inline]
1129#[target_feature(enable = "avx512f")]
1130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1131#[cfg_attr(test, assert_instr(vpsubq))]
1132pub fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1133    unsafe {
1134        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1135        transmute(simd_select_bitmask(k, sub, i64x8::ZERO))
1136    }
1137}
1138
1139/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1140///
1141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
1142#[inline]
1143#[target_feature(enable = "avx512f,avx512vl")]
1144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1145#[cfg_attr(test, assert_instr(vpsubq))]
1146pub fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1147    unsafe {
1148        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1149        transmute(simd_select_bitmask(k, sub, src.as_i64x4()))
1150    }
1151}
1152
1153/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1154///
1155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1156#[inline]
1157#[target_feature(enable = "avx512f,avx512vl")]
1158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1159#[cfg_attr(test, assert_instr(vpsubq))]
1160pub fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1161    unsafe {
1162        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1163        transmute(simd_select_bitmask(k, sub, i64x4::ZERO))
1164    }
1165}
1166
1167/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1168///
1169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1170#[inline]
1171#[target_feature(enable = "avx512f,avx512vl")]
1172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1173#[cfg_attr(test, assert_instr(vpsubq))]
1174pub fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1175    unsafe {
1176        let sub = _mm_sub_epi64(a, b).as_i64x2();
1177        transmute(simd_select_bitmask(k, sub, src.as_i64x2()))
1178    }
1179}
1180
1181/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1182///
1183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1184#[inline]
1185#[target_feature(enable = "avx512f,avx512vl")]
1186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1187#[cfg_attr(test, assert_instr(vpsubq))]
1188pub fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1189    unsafe {
1190        let sub = _mm_sub_epi64(a, b).as_i64x2();
1191        transmute(simd_select_bitmask(k, sub, i64x2::ZERO))
1192    }
1193}
1194
1195/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1196///
1197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1198#[inline]
1199#[target_feature(enable = "avx512f")]
1200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1201#[cfg_attr(test, assert_instr(vsubps))]
1202pub fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1203    unsafe { transmute(simd_sub(a.as_f32x16(), b.as_f32x16())) }
1204}
1205
1206/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1207///
1208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1209#[inline]
1210#[target_feature(enable = "avx512f")]
1211#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1212#[cfg_attr(test, assert_instr(vsubps))]
1213pub fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1214    unsafe {
1215        let sub = _mm512_sub_ps(a, b).as_f32x16();
1216        transmute(simd_select_bitmask(k, sub, src.as_f32x16()))
1217    }
1218}
1219
1220/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1221///
1222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1223#[inline]
1224#[target_feature(enable = "avx512f")]
1225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1226#[cfg_attr(test, assert_instr(vsubps))]
1227pub fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1228    unsafe {
1229        let sub = _mm512_sub_ps(a, b).as_f32x16();
1230        transmute(simd_select_bitmask(k, sub, f32x16::ZERO))
1231    }
1232}
1233
1234/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1235///
1236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1237#[inline]
1238#[target_feature(enable = "avx512f,avx512vl")]
1239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1240#[cfg_attr(test, assert_instr(vsubps))]
1241pub fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1242    unsafe {
1243        let sub = _mm256_sub_ps(a, b).as_f32x8();
1244        transmute(simd_select_bitmask(k, sub, src.as_f32x8()))
1245    }
1246}
1247
1248/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1249///
1250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1251#[inline]
1252#[target_feature(enable = "avx512f,avx512vl")]
1253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1254#[cfg_attr(test, assert_instr(vsubps))]
1255pub fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1256    unsafe {
1257        let sub = _mm256_sub_ps(a, b).as_f32x8();
1258        transmute(simd_select_bitmask(k, sub, f32x8::ZERO))
1259    }
1260}
1261
1262/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1263///
1264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1265#[inline]
1266#[target_feature(enable = "avx512f,avx512vl")]
1267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1268#[cfg_attr(test, assert_instr(vsubps))]
1269pub fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1270    unsafe {
1271        let sub = _mm_sub_ps(a, b).as_f32x4();
1272        transmute(simd_select_bitmask(k, sub, src.as_f32x4()))
1273    }
1274}
1275
1276/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1277///
1278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1279#[inline]
1280#[target_feature(enable = "avx512f,avx512vl")]
1281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1282#[cfg_attr(test, assert_instr(vsubps))]
1283pub fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1284    unsafe {
1285        let sub = _mm_sub_ps(a, b).as_f32x4();
1286        transmute(simd_select_bitmask(k, sub, f32x4::ZERO))
1287    }
1288}
1289
1290/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1291///
1292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1293#[inline]
1294#[target_feature(enable = "avx512f")]
1295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1296#[cfg_attr(test, assert_instr(vsubpd))]
1297pub fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1298    unsafe { transmute(simd_sub(a.as_f64x8(), b.as_f64x8())) }
1299}
1300
1301/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1302///
1303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1304#[inline]
1305#[target_feature(enable = "avx512f")]
1306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1307#[cfg_attr(test, assert_instr(vsubpd))]
1308pub fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1309    unsafe {
1310        let sub = _mm512_sub_pd(a, b).as_f64x8();
1311        transmute(simd_select_bitmask(k, sub, src.as_f64x8()))
1312    }
1313}
1314
1315/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1316///
1317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1318#[inline]
1319#[target_feature(enable = "avx512f")]
1320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1321#[cfg_attr(test, assert_instr(vsubpd))]
1322pub fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1323    unsafe {
1324        let sub = _mm512_sub_pd(a, b).as_f64x8();
1325        transmute(simd_select_bitmask(k, sub, f64x8::ZERO))
1326    }
1327}
1328
1329/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1330///
1331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1332#[inline]
1333#[target_feature(enable = "avx512f,avx512vl")]
1334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1335#[cfg_attr(test, assert_instr(vsubpd))]
1336pub fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1337    unsafe {
1338        let sub = _mm256_sub_pd(a, b).as_f64x4();
1339        transmute(simd_select_bitmask(k, sub, src.as_f64x4()))
1340    }
1341}
1342
1343/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1344///
1345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1346#[inline]
1347#[target_feature(enable = "avx512f,avx512vl")]
1348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1349#[cfg_attr(test, assert_instr(vsubpd))]
1350pub fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1351    unsafe {
1352        let sub = _mm256_sub_pd(a, b).as_f64x4();
1353        transmute(simd_select_bitmask(k, sub, f64x4::ZERO))
1354    }
1355}
1356
1357/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1358///
1359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1360#[inline]
1361#[target_feature(enable = "avx512f,avx512vl")]
1362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1363#[cfg_attr(test, assert_instr(vsubpd))]
1364pub fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1365    unsafe {
1366        let sub = _mm_sub_pd(a, b).as_f64x2();
1367        transmute(simd_select_bitmask(k, sub, src.as_f64x2()))
1368    }
1369}
1370
1371/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1372///
1373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1374#[inline]
1375#[target_feature(enable = "avx512f,avx512vl")]
1376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1377#[cfg_attr(test, assert_instr(vsubpd))]
1378pub fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1379    unsafe {
1380        let sub = _mm_sub_pd(a, b).as_f64x2();
1381        transmute(simd_select_bitmask(k, sub, f64x2::ZERO))
1382    }
1383}
1384
1385/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1386///
1387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1388#[inline]
1389#[target_feature(enable = "avx512f")]
1390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1391#[cfg_attr(test, assert_instr(vpmuldq))]
1392pub fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1393    unsafe {
1394        let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
1395        let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
1396        transmute(simd_mul(a, b))
1397    }
1398}
1399
1400/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1401///
1402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1403#[inline]
1404#[target_feature(enable = "avx512f")]
1405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1406#[cfg_attr(test, assert_instr(vpmuldq))]
1407pub fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1408    unsafe {
1409        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1410        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1411    }
1412}
1413
1414/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1415///
1416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1417#[inline]
1418#[target_feature(enable = "avx512f")]
1419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1420#[cfg_attr(test, assert_instr(vpmuldq))]
1421pub fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1422    unsafe {
1423        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1424        transmute(simd_select_bitmask(k, mul, i64x8::ZERO))
1425    }
1426}
1427
1428/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1429///
1430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1431#[inline]
1432#[target_feature(enable = "avx512f,avx512vl")]
1433#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1434#[cfg_attr(test, assert_instr(vpmuldq))]
1435pub fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1436    unsafe {
1437        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1438        transmute(simd_select_bitmask(k, mul, src.as_i64x4()))
1439    }
1440}
1441
1442/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1443///
1444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1445#[inline]
1446#[target_feature(enable = "avx512f,avx512vl")]
1447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1448#[cfg_attr(test, assert_instr(vpmuldq))]
1449pub fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1450    unsafe {
1451        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1452        transmute(simd_select_bitmask(k, mul, i64x4::ZERO))
1453    }
1454}
1455
1456/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1457///
1458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1459#[inline]
1460#[target_feature(enable = "avx512f,avx512vl")]
1461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1462#[cfg_attr(test, assert_instr(vpmuldq))]
1463pub fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1464    unsafe {
1465        let mul = _mm_mul_epi32(a, b).as_i64x2();
1466        transmute(simd_select_bitmask(k, mul, src.as_i64x2()))
1467    }
1468}
1469
1470/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1471///
1472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1473#[inline]
1474#[target_feature(enable = "avx512f,avx512vl")]
1475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1476#[cfg_attr(test, assert_instr(vpmuldq))]
1477pub fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1478    unsafe {
1479        let mul = _mm_mul_epi32(a, b).as_i64x2();
1480        transmute(simd_select_bitmask(k, mul, i64x2::ZERO))
1481    }
1482}
1483
1484/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1485///
1486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi32&expand=4005)
1487#[inline]
1488#[target_feature(enable = "avx512f")]
1489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1490#[cfg_attr(test, assert_instr(vpmulld))]
1491pub fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1492    unsafe { transmute(simd_mul(a.as_i32x16(), b.as_i32x16())) }
1493}
1494
1495/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1496///
1497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1498#[inline]
1499#[target_feature(enable = "avx512f")]
1500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1501#[cfg_attr(test, assert_instr(vpmulld))]
1502pub fn _mm512_mask_mullo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1503    unsafe {
1504        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1505        transmute(simd_select_bitmask(k, mul, src.as_i32x16()))
1506    }
1507}
1508
1509/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1510///
1511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1512#[inline]
1513#[target_feature(enable = "avx512f")]
1514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1515#[cfg_attr(test, assert_instr(vpmulld))]
1516pub fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1517    unsafe {
1518        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1519        transmute(simd_select_bitmask(k, mul, i32x16::ZERO))
1520    }
1521}
1522
1523/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1524///
1525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1526#[inline]
1527#[target_feature(enable = "avx512f,avx512vl")]
1528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1529#[cfg_attr(test, assert_instr(vpmulld))]
1530pub fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1531    unsafe {
1532        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1533        transmute(simd_select_bitmask(k, mul, src.as_i32x8()))
1534    }
1535}
1536
1537/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1538///
1539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1540#[inline]
1541#[target_feature(enable = "avx512f,avx512vl")]
1542#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1543#[cfg_attr(test, assert_instr(vpmulld))]
1544pub fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1545    unsafe {
1546        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1547        transmute(simd_select_bitmask(k, mul, i32x8::ZERO))
1548    }
1549}
1550
1551/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1552///
1553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1554#[inline]
1555#[target_feature(enable = "avx512f,avx512vl")]
1556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1557#[cfg_attr(test, assert_instr(vpmulld))]
1558pub fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1559    unsafe {
1560        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1561        transmute(simd_select_bitmask(k, mul, src.as_i32x4()))
1562    }
1563}
1564
1565/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1566///
1567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1568#[inline]
1569#[target_feature(enable = "avx512f,avx512vl")]
1570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1571#[cfg_attr(test, assert_instr(vpmulld))]
1572pub fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1573    unsafe {
1574        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1575        transmute(simd_select_bitmask(k, mul, i32x4::ZERO))
1576    }
1577}
1578
1579/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1580///
1581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullox_epi64&expand=4017)
1582///
1583/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1584#[inline]
1585#[target_feature(enable = "avx512f")]
1586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1587pub fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1588    unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) }
1589}
1590
1591/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1592///
1593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullox_epi64&expand=4016)
1594///
1595/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1596#[inline]
1597#[target_feature(enable = "avx512f")]
1598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1599pub fn _mm512_mask_mullox_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1600    unsafe {
1601        let mul = _mm512_mullox_epi64(a, b).as_i64x8();
1602        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1603    }
1604}
1605
1606/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1607///
1608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epu32&expand=3916)
1609#[inline]
1610#[target_feature(enable = "avx512f")]
1611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1612#[cfg_attr(test, assert_instr(vpmuludq))]
1613pub fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1614    unsafe {
1615        let a = a.as_u64x8();
1616        let b = b.as_u64x8();
1617        let mask = u64x8::splat(u32::MAX.into());
1618        transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
1619    }
1620}
1621
1622/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1623///
1624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epu32&expand=3914)
1625#[inline]
1626#[target_feature(enable = "avx512f")]
1627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1628#[cfg_attr(test, assert_instr(vpmuludq))]
1629pub fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1630    unsafe {
1631        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1632        transmute(simd_select_bitmask(k, mul, src.as_u64x8()))
1633    }
1634}
1635
1636/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1637///
1638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epu32&expand=3915)
1639#[inline]
1640#[target_feature(enable = "avx512f")]
1641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1642#[cfg_attr(test, assert_instr(vpmuludq))]
1643pub fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1644    unsafe {
1645        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1646        transmute(simd_select_bitmask(k, mul, u64x8::ZERO))
1647    }
1648}
1649
1650/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1651///
1652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1653#[inline]
1654#[target_feature(enable = "avx512f,avx512vl")]
1655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1656#[cfg_attr(test, assert_instr(vpmuludq))]
1657pub fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1658    unsafe {
1659        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1660        transmute(simd_select_bitmask(k, mul, src.as_u64x4()))
1661    }
1662}
1663
1664/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1665///
1666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1667#[inline]
1668#[target_feature(enable = "avx512f,avx512vl")]
1669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1670#[cfg_attr(test, assert_instr(vpmuludq))]
1671pub fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1672    unsafe {
1673        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1674        transmute(simd_select_bitmask(k, mul, u64x4::ZERO))
1675    }
1676}
1677
1678/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1679///
1680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1681#[inline]
1682#[target_feature(enable = "avx512f,avx512vl")]
1683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1684#[cfg_attr(test, assert_instr(vpmuludq))]
1685pub fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1686    unsafe {
1687        let mul = _mm_mul_epu32(a, b).as_u64x2();
1688        transmute(simd_select_bitmask(k, mul, src.as_u64x2()))
1689    }
1690}
1691
1692/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1693///
1694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1695#[inline]
1696#[target_feature(enable = "avx512f,avx512vl")]
1697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1698#[cfg_attr(test, assert_instr(vpmuludq))]
1699pub fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1700    unsafe {
1701        let mul = _mm_mul_epu32(a, b).as_u64x2();
1702        transmute(simd_select_bitmask(k, mul, u64x2::ZERO))
1703    }
1704}
1705
1706/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1707///
1708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1709#[inline]
1710#[target_feature(enable = "avx512f")]
1711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1712#[cfg_attr(test, assert_instr(vmulps))]
1713pub fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1714    unsafe { transmute(simd_mul(a.as_f32x16(), b.as_f32x16())) }
1715}
1716
1717/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1718///
1719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1720#[inline]
1721#[target_feature(enable = "avx512f")]
1722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1723#[cfg_attr(test, assert_instr(vmulps))]
1724pub fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1725    unsafe {
1726        let mul = _mm512_mul_ps(a, b).as_f32x16();
1727        transmute(simd_select_bitmask(k, mul, src.as_f32x16()))
1728    }
1729}
1730
1731/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1732///
1733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1734#[inline]
1735#[target_feature(enable = "avx512f")]
1736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1737#[cfg_attr(test, assert_instr(vmulps))]
1738pub fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1739    unsafe {
1740        let mul = _mm512_mul_ps(a, b).as_f32x16();
1741        transmute(simd_select_bitmask(k, mul, f32x16::ZERO))
1742    }
1743}
1744
1745/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1746///
1747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1748#[inline]
1749#[target_feature(enable = "avx512f,avx512vl")]
1750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1751#[cfg_attr(test, assert_instr(vmulps))]
1752pub fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1753    unsafe {
1754        let mul = _mm256_mul_ps(a, b).as_f32x8();
1755        transmute(simd_select_bitmask(k, mul, src.as_f32x8()))
1756    }
1757}
1758
1759/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1760///
1761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1762#[inline]
1763#[target_feature(enable = "avx512f,avx512vl")]
1764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1765#[cfg_attr(test, assert_instr(vmulps))]
1766pub fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1767    unsafe {
1768        let mul = _mm256_mul_ps(a, b).as_f32x8();
1769        transmute(simd_select_bitmask(k, mul, f32x8::ZERO))
1770    }
1771}
1772
1773/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1774///
1775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1776#[inline]
1777#[target_feature(enable = "avx512f,avx512vl")]
1778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1779#[cfg_attr(test, assert_instr(vmulps))]
1780pub fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1781    unsafe {
1782        let mul = _mm_mul_ps(a, b).as_f32x4();
1783        transmute(simd_select_bitmask(k, mul, src.as_f32x4()))
1784    }
1785}
1786
1787/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1788///
1789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1790#[inline]
1791#[target_feature(enable = "avx512f,avx512vl")]
1792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1793#[cfg_attr(test, assert_instr(vmulps))]
1794pub fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1795    unsafe {
1796        let mul = _mm_mul_ps(a, b).as_f32x4();
1797        transmute(simd_select_bitmask(k, mul, f32x4::ZERO))
1798    }
1799}
1800
1801/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1802///
1803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1804#[inline]
1805#[target_feature(enable = "avx512f")]
1806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1807#[cfg_attr(test, assert_instr(vmulpd))]
1808pub fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1809    unsafe { transmute(simd_mul(a.as_f64x8(), b.as_f64x8())) }
1810}
1811
1812/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1813///
1814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1815#[inline]
1816#[target_feature(enable = "avx512f")]
1817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1818#[cfg_attr(test, assert_instr(vmulpd))]
1819pub fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1820    unsafe {
1821        let mul = _mm512_mul_pd(a, b).as_f64x8();
1822        transmute(simd_select_bitmask(k, mul, src.as_f64x8()))
1823    }
1824}
1825
1826/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1827///
1828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1829#[inline]
1830#[target_feature(enable = "avx512f")]
1831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1832#[cfg_attr(test, assert_instr(vmulpd))]
1833pub fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1834    unsafe {
1835        let mul = _mm512_mul_pd(a, b).as_f64x8();
1836        transmute(simd_select_bitmask(k, mul, f64x8::ZERO))
1837    }
1838}
1839
1840/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1841///
1842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1843#[inline]
1844#[target_feature(enable = "avx512f,avx512vl")]
1845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1846#[cfg_attr(test, assert_instr(vmulpd))]
1847pub fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1848    unsafe {
1849        let mul = _mm256_mul_pd(a, b).as_f64x4();
1850        transmute(simd_select_bitmask(k, mul, src.as_f64x4()))
1851    }
1852}
1853
1854/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1855///
1856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
1857#[inline]
1858#[target_feature(enable = "avx512f,avx512vl")]
1859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1860#[cfg_attr(test, assert_instr(vmulpd))]
1861pub fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1862    unsafe {
1863        let mul = _mm256_mul_pd(a, b).as_f64x4();
1864        transmute(simd_select_bitmask(k, mul, f64x4::ZERO))
1865    }
1866}
1867
1868/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1869///
1870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
1871#[inline]
1872#[target_feature(enable = "avx512f,avx512vl")]
1873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1874#[cfg_attr(test, assert_instr(vmulpd))]
1875pub fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1876    unsafe {
1877        let mul = _mm_mul_pd(a, b).as_f64x2();
1878        transmute(simd_select_bitmask(k, mul, src.as_f64x2()))
1879    }
1880}
1881
1882/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1883///
1884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
1885#[inline]
1886#[target_feature(enable = "avx512f,avx512vl")]
1887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1888#[cfg_attr(test, assert_instr(vmulpd))]
1889pub fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1890    unsafe {
1891        let mul = _mm_mul_pd(a, b).as_f64x2();
1892        transmute(simd_select_bitmask(k, mul, f64x2::ZERO))
1893    }
1894}
1895
1896/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1897///
1898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
1899#[inline]
1900#[target_feature(enable = "avx512f")]
1901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1902#[cfg_attr(test, assert_instr(vdivps))]
1903pub fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
1904    unsafe { transmute(simd_div(a.as_f32x16(), b.as_f32x16())) }
1905}
1906
1907/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1908///
1909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
1910#[inline]
1911#[target_feature(enable = "avx512f")]
1912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1913#[cfg_attr(test, assert_instr(vdivps))]
1914pub fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1915    unsafe {
1916        let div = _mm512_div_ps(a, b).as_f32x16();
1917        transmute(simd_select_bitmask(k, div, src.as_f32x16()))
1918    }
1919}
1920
1921/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1922///
1923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
1924#[inline]
1925#[target_feature(enable = "avx512f")]
1926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1927#[cfg_attr(test, assert_instr(vdivps))]
1928pub fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1929    unsafe {
1930        let div = _mm512_div_ps(a, b).as_f32x16();
1931        transmute(simd_select_bitmask(k, div, f32x16::ZERO))
1932    }
1933}
1934
1935/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1936///
1937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
1938#[inline]
1939#[target_feature(enable = "avx512f,avx512vl")]
1940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1941#[cfg_attr(test, assert_instr(vdivps))]
1942pub fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1943    unsafe {
1944        let div = _mm256_div_ps(a, b).as_f32x8();
1945        transmute(simd_select_bitmask(k, div, src.as_f32x8()))
1946    }
1947}
1948
1949/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1950///
1951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
1952#[inline]
1953#[target_feature(enable = "avx512f,avx512vl")]
1954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1955#[cfg_attr(test, assert_instr(vdivps))]
1956pub fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1957    unsafe {
1958        let div = _mm256_div_ps(a, b).as_f32x8();
1959        transmute(simd_select_bitmask(k, div, f32x8::ZERO))
1960    }
1961}
1962
1963/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1964///
1965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
1966#[inline]
1967#[target_feature(enable = "avx512f,avx512vl")]
1968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1969#[cfg_attr(test, assert_instr(vdivps))]
1970pub fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1971    unsafe {
1972        let div = _mm_div_ps(a, b).as_f32x4();
1973        transmute(simd_select_bitmask(k, div, src.as_f32x4()))
1974    }
1975}
1976
1977/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1978///
1979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
1980#[inline]
1981#[target_feature(enable = "avx512f,avx512vl")]
1982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1983#[cfg_attr(test, assert_instr(vdivps))]
1984pub fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1985    unsafe {
1986        let div = _mm_div_ps(a, b).as_f32x4();
1987        transmute(simd_select_bitmask(k, div, f32x4::ZERO))
1988    }
1989}
1990
1991/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1992///
1993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_pd&expand=2153)
1994#[inline]
1995#[target_feature(enable = "avx512f")]
1996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1997#[cfg_attr(test, assert_instr(vdivpd))]
1998pub fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
1999    unsafe { transmute(simd_div(a.as_f64x8(), b.as_f64x8())) }
2000}
2001
2002/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2003///
2004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
2005#[inline]
2006#[target_feature(enable = "avx512f")]
2007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2008#[cfg_attr(test, assert_instr(vdivpd))]
2009pub fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2010    unsafe {
2011        let div = _mm512_div_pd(a, b).as_f64x8();
2012        transmute(simd_select_bitmask(k, div, src.as_f64x8()))
2013    }
2014}
2015
2016/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2017///
2018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
2019#[inline]
2020#[target_feature(enable = "avx512f")]
2021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2022#[cfg_attr(test, assert_instr(vdivpd))]
2023pub fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2024    unsafe {
2025        let div = _mm512_div_pd(a, b).as_f64x8();
2026        transmute(simd_select_bitmask(k, div, f64x8::ZERO))
2027    }
2028}
2029
2030/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2031///
2032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
2033#[inline]
2034#[target_feature(enable = "avx512f,avx512vl")]
2035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2036#[cfg_attr(test, assert_instr(vdivpd))]
2037pub fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2038    unsafe {
2039        let div = _mm256_div_pd(a, b).as_f64x4();
2040        transmute(simd_select_bitmask(k, div, src.as_f64x4()))
2041    }
2042}
2043
2044/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2045///
2046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
2047#[inline]
2048#[target_feature(enable = "avx512f,avx512vl")]
2049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2050#[cfg_attr(test, assert_instr(vdivpd))]
2051pub fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2052    unsafe {
2053        let div = _mm256_div_pd(a, b).as_f64x4();
2054        transmute(simd_select_bitmask(k, div, f64x4::ZERO))
2055    }
2056}
2057
2058/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2059///
2060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
2061#[inline]
2062#[target_feature(enable = "avx512f,avx512vl")]
2063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2064#[cfg_attr(test, assert_instr(vdivpd))]
2065pub fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2066    unsafe {
2067        let div = _mm_div_pd(a, b).as_f64x2();
2068        transmute(simd_select_bitmask(k, div, src.as_f64x2()))
2069    }
2070}
2071
2072/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2073///
2074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
2075#[inline]
2076#[target_feature(enable = "avx512f,avx512vl")]
2077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2078#[cfg_attr(test, assert_instr(vdivpd))]
2079pub fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2080    unsafe {
2081        let div = _mm_div_pd(a, b).as_f64x2();
2082        transmute(simd_select_bitmask(k, div, f64x2::ZERO))
2083    }
2084}
2085
2086/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
2087///
2088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
2089#[inline]
2090#[target_feature(enable = "avx512f")]
2091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2092#[cfg_attr(test, assert_instr(vpmaxsd))]
2093pub fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
2094    unsafe {
2095        let a = a.as_i32x16();
2096        let b = b.as_i32x16();
2097        transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
2098    }
2099}
2100
2101/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2102///
2103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
2104#[inline]
2105#[target_feature(enable = "avx512f")]
2106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2107#[cfg_attr(test, assert_instr(vpmaxsd))]
2108pub fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2109    unsafe {
2110        let max = _mm512_max_epi32(a, b).as_i32x16();
2111        transmute(simd_select_bitmask(k, max, src.as_i32x16()))
2112    }
2113}
2114
2115/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2116///
2117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
2118#[inline]
2119#[target_feature(enable = "avx512f")]
2120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2121#[cfg_attr(test, assert_instr(vpmaxsd))]
2122pub fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2123    unsafe {
2124        let max = _mm512_max_epi32(a, b).as_i32x16();
2125        transmute(simd_select_bitmask(k, max, i32x16::ZERO))
2126    }
2127}
2128
2129/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2130///
2131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
2132#[inline]
2133#[target_feature(enable = "avx512f,avx512vl")]
2134#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2135#[cfg_attr(test, assert_instr(vpmaxsd))]
2136pub fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2137    unsafe {
2138        let max = _mm256_max_epi32(a, b).as_i32x8();
2139        transmute(simd_select_bitmask(k, max, src.as_i32x8()))
2140    }
2141}
2142
2143/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2144///
2145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
2146#[inline]
2147#[target_feature(enable = "avx512f,avx512vl")]
2148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2149#[cfg_attr(test, assert_instr(vpmaxsd))]
2150pub fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2151    unsafe {
2152        let max = _mm256_max_epi32(a, b).as_i32x8();
2153        transmute(simd_select_bitmask(k, max, i32x8::ZERO))
2154    }
2155}
2156
2157/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2158///
2159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
2160#[inline]
2161#[target_feature(enable = "avx512f,avx512vl")]
2162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2163#[cfg_attr(test, assert_instr(vpmaxsd))]
2164pub fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2165    unsafe {
2166        let max = _mm_max_epi32(a, b).as_i32x4();
2167        transmute(simd_select_bitmask(k, max, src.as_i32x4()))
2168    }
2169}
2170
2171/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2172///
2173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
2174#[inline]
2175#[target_feature(enable = "avx512f,avx512vl")]
2176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2177#[cfg_attr(test, assert_instr(vpmaxsd))]
2178pub fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2179    unsafe {
2180        let max = _mm_max_epi32(a, b).as_i32x4();
2181        transmute(simd_select_bitmask(k, max, i32x4::ZERO))
2182    }
2183}
2184
2185/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2186///
2187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
2188#[inline]
2189#[target_feature(enable = "avx512f")]
2190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2191#[cfg_attr(test, assert_instr(vpmaxsq))]
2192pub fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
2193    unsafe {
2194        let a = a.as_i64x8();
2195        let b = b.as_i64x8();
2196        transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
2197    }
2198}
2199
2200/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2201///
2202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
2203#[inline]
2204#[target_feature(enable = "avx512f")]
2205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2206#[cfg_attr(test, assert_instr(vpmaxsq))]
2207pub fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2208    unsafe {
2209        let max = _mm512_max_epi64(a, b).as_i64x8();
2210        transmute(simd_select_bitmask(k, max, src.as_i64x8()))
2211    }
2212}
2213
2214/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2215///
2216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
2217#[inline]
2218#[target_feature(enable = "avx512f")]
2219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2220#[cfg_attr(test, assert_instr(vpmaxsq))]
2221pub fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2222    unsafe {
2223        let max = _mm512_max_epi64(a, b).as_i64x8();
2224        transmute(simd_select_bitmask(k, max, i64x8::ZERO))
2225    }
2226}
2227
2228/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2229///
2230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
2231#[inline]
2232#[target_feature(enable = "avx512f,avx512vl")]
2233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2234#[cfg_attr(test, assert_instr(vpmaxsq))]
2235pub fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
2236    unsafe {
2237        let a = a.as_i64x4();
2238        let b = b.as_i64x4();
2239        transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
2240    }
2241}
2242
2243/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2244///
2245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
2246#[inline]
2247#[target_feature(enable = "avx512f,avx512vl")]
2248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2249#[cfg_attr(test, assert_instr(vpmaxsq))]
2250pub fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2251    unsafe {
2252        let max = _mm256_max_epi64(a, b).as_i64x4();
2253        transmute(simd_select_bitmask(k, max, src.as_i64x4()))
2254    }
2255}
2256
2257/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2258///
2259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
2260#[inline]
2261#[target_feature(enable = "avx512f,avx512vl")]
2262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2263#[cfg_attr(test, assert_instr(vpmaxsq))]
2264pub fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2265    unsafe {
2266        let max = _mm256_max_epi64(a, b).as_i64x4();
2267        transmute(simd_select_bitmask(k, max, i64x4::ZERO))
2268    }
2269}
2270
2271/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2272///
2273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
2274#[inline]
2275#[target_feature(enable = "avx512f,avx512vl")]
2276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2277#[cfg_attr(test, assert_instr(vpmaxsq))]
2278pub fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2279    unsafe {
2280        let a = a.as_i64x2();
2281        let b = b.as_i64x2();
2282        transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
2283    }
2284}
2285
2286/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2287///
2288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2289#[inline]
2290#[target_feature(enable = "avx512f,avx512vl")]
2291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2292#[cfg_attr(test, assert_instr(vpmaxsq))]
2293pub fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2294    unsafe {
2295        let max = _mm_max_epi64(a, b).as_i64x2();
2296        transmute(simd_select_bitmask(k, max, src.as_i64x2()))
2297    }
2298}
2299
2300/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2301///
2302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2303#[inline]
2304#[target_feature(enable = "avx512f,avx512vl")]
2305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2306#[cfg_attr(test, assert_instr(vpmaxsq))]
2307pub fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2308    unsafe {
2309        let max = _mm_max_epi64(a, b).as_i64x2();
2310        transmute(simd_select_bitmask(k, max, i64x2::ZERO))
2311    }
2312}
2313
2314/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2315///
2316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2317#[inline]
2318#[target_feature(enable = "avx512f")]
2319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2320#[cfg_attr(test, assert_instr(vmaxps))]
2321pub fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2322    unsafe {
2323        transmute(vmaxps(
2324            a.as_f32x16(),
2325            b.as_f32x16(),
2326            _MM_FROUND_CUR_DIRECTION,
2327        ))
2328    }
2329}
2330
2331/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2332///
2333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2334#[inline]
2335#[target_feature(enable = "avx512f")]
2336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2337#[cfg_attr(test, assert_instr(vmaxps))]
2338pub fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2339    unsafe {
2340        let max = _mm512_max_ps(a, b).as_f32x16();
2341        transmute(simd_select_bitmask(k, max, src.as_f32x16()))
2342    }
2343}
2344
2345/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2346///
2347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2348#[inline]
2349#[target_feature(enable = "avx512f")]
2350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2351#[cfg_attr(test, assert_instr(vmaxps))]
2352pub fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2353    unsafe {
2354        let max = _mm512_max_ps(a, b).as_f32x16();
2355        transmute(simd_select_bitmask(k, max, f32x16::ZERO))
2356    }
2357}
2358
2359/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2360///
2361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2362#[inline]
2363#[target_feature(enable = "avx512f,avx512vl")]
2364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2365#[cfg_attr(test, assert_instr(vmaxps))]
2366pub fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2367    unsafe {
2368        let max = _mm256_max_ps(a, b).as_f32x8();
2369        transmute(simd_select_bitmask(k, max, src.as_f32x8()))
2370    }
2371}
2372
2373/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2374///
2375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2376#[inline]
2377#[target_feature(enable = "avx512f,avx512vl")]
2378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2379#[cfg_attr(test, assert_instr(vmaxps))]
2380pub fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2381    unsafe {
2382        let max = _mm256_max_ps(a, b).as_f32x8();
2383        transmute(simd_select_bitmask(k, max, f32x8::ZERO))
2384    }
2385}
2386
2387/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2388///
2389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2390#[inline]
2391#[target_feature(enable = "avx512f,avx512vl")]
2392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2393#[cfg_attr(test, assert_instr(vmaxps))]
2394pub fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2395    unsafe {
2396        let max = _mm_max_ps(a, b).as_f32x4();
2397        transmute(simd_select_bitmask(k, max, src.as_f32x4()))
2398    }
2399}
2400
2401/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2402///
2403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2404#[inline]
2405#[target_feature(enable = "avx512f,avx512vl")]
2406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2407#[cfg_attr(test, assert_instr(vmaxps))]
2408pub fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2409    unsafe {
2410        let max = _mm_max_ps(a, b).as_f32x4();
2411        transmute(simd_select_bitmask(k, max, f32x4::ZERO))
2412    }
2413}
2414
2415/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2416///
2417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2418#[inline]
2419#[target_feature(enable = "avx512f")]
2420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2421#[cfg_attr(test, assert_instr(vmaxpd))]
2422pub fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2423    unsafe { transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
2424}
2425
2426/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2427///
2428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2429#[inline]
2430#[target_feature(enable = "avx512f")]
2431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2432#[cfg_attr(test, assert_instr(vmaxpd))]
2433pub fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2434    unsafe {
2435        let max = _mm512_max_pd(a, b).as_f64x8();
2436        transmute(simd_select_bitmask(k, max, src.as_f64x8()))
2437    }
2438}
2439
2440/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2441///
2442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2443#[inline]
2444#[target_feature(enable = "avx512f")]
2445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2446#[cfg_attr(test, assert_instr(vmaxpd))]
2447pub fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2448    unsafe {
2449        let max = _mm512_max_pd(a, b).as_f64x8();
2450        transmute(simd_select_bitmask(k, max, f64x8::ZERO))
2451    }
2452}
2453
2454/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2455///
2456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2457#[inline]
2458#[target_feature(enable = "avx512f,avx512vl")]
2459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2460#[cfg_attr(test, assert_instr(vmaxpd))]
2461pub fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2462    unsafe {
2463        let max = _mm256_max_pd(a, b).as_f64x4();
2464        transmute(simd_select_bitmask(k, max, src.as_f64x4()))
2465    }
2466}
2467
2468/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2469///
2470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2471#[inline]
2472#[target_feature(enable = "avx512f,avx512vl")]
2473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2474#[cfg_attr(test, assert_instr(vmaxpd))]
2475pub fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2476    unsafe {
2477        let max = _mm256_max_pd(a, b).as_f64x4();
2478        transmute(simd_select_bitmask(k, max, f64x4::ZERO))
2479    }
2480}
2481
2482/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2483///
2484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2485#[inline]
2486#[target_feature(enable = "avx512f,avx512vl")]
2487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2488#[cfg_attr(test, assert_instr(vmaxpd))]
2489pub fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2490    unsafe {
2491        let max = _mm_max_pd(a, b).as_f64x2();
2492        transmute(simd_select_bitmask(k, max, src.as_f64x2()))
2493    }
2494}
2495
2496/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2497///
2498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2499#[inline]
2500#[target_feature(enable = "avx512f,avx512vl")]
2501#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2502#[cfg_attr(test, assert_instr(vmaxpd))]
2503pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2504    unsafe {
2505        let max = _mm_max_pd(a, b).as_f64x2();
2506        transmute(simd_select_bitmask(k, max, f64x2::ZERO))
2507    }
2508}
2509
2510/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2511///
2512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2513#[inline]
2514#[target_feature(enable = "avx512f")]
2515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2516#[cfg_attr(test, assert_instr(vpmaxud))]
2517pub fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2518    unsafe {
2519        let a = a.as_u32x16();
2520        let b = b.as_u32x16();
2521        transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
2522    }
2523}
2524
2525/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2526///
2527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2528#[inline]
2529#[target_feature(enable = "avx512f")]
2530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2531#[cfg_attr(test, assert_instr(vpmaxud))]
2532pub fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2533    unsafe {
2534        let max = _mm512_max_epu32(a, b).as_u32x16();
2535        transmute(simd_select_bitmask(k, max, src.as_u32x16()))
2536    }
2537}
2538
2539/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2540///
2541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2542#[inline]
2543#[target_feature(enable = "avx512f")]
2544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2545#[cfg_attr(test, assert_instr(vpmaxud))]
2546pub fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2547    unsafe {
2548        let max = _mm512_max_epu32(a, b).as_u32x16();
2549        transmute(simd_select_bitmask(k, max, u32x16::ZERO))
2550    }
2551}
2552
2553/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2554///
2555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2556#[inline]
2557#[target_feature(enable = "avx512f,avx512vl")]
2558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2559#[cfg_attr(test, assert_instr(vpmaxud))]
2560pub fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2561    unsafe {
2562        let max = _mm256_max_epu32(a, b).as_u32x8();
2563        transmute(simd_select_bitmask(k, max, src.as_u32x8()))
2564    }
2565}
2566
2567/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2568///
2569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2570#[inline]
2571#[target_feature(enable = "avx512f,avx512vl")]
2572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2573#[cfg_attr(test, assert_instr(vpmaxud))]
2574pub fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2575    unsafe {
2576        let max = _mm256_max_epu32(a, b).as_u32x8();
2577        transmute(simd_select_bitmask(k, max, u32x8::ZERO))
2578    }
2579}
2580
2581/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2582///
2583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2584#[inline]
2585#[target_feature(enable = "avx512f,avx512vl")]
2586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2587#[cfg_attr(test, assert_instr(vpmaxud))]
2588pub fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2589    unsafe {
2590        let max = _mm_max_epu32(a, b).as_u32x4();
2591        transmute(simd_select_bitmask(k, max, src.as_u32x4()))
2592    }
2593}
2594
2595/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2596///
2597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2598#[inline]
2599#[target_feature(enable = "avx512f,avx512vl")]
2600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2601#[cfg_attr(test, assert_instr(vpmaxud))]
2602pub fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2603    unsafe {
2604        let max = _mm_max_epu32(a, b).as_u32x4();
2605        transmute(simd_select_bitmask(k, max, u32x4::ZERO))
2606    }
2607}
2608
2609/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2610///
2611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2612#[inline]
2613#[target_feature(enable = "avx512f")]
2614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2615#[cfg_attr(test, assert_instr(vpmaxuq))]
2616pub fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2617    unsafe {
2618        let a = a.as_u64x8();
2619        let b = b.as_u64x8();
2620        transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
2621    }
2622}
2623
2624/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2625///
2626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2627#[inline]
2628#[target_feature(enable = "avx512f")]
2629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2630#[cfg_attr(test, assert_instr(vpmaxuq))]
2631pub fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2632    unsafe {
2633        let max = _mm512_max_epu64(a, b).as_u64x8();
2634        transmute(simd_select_bitmask(k, max, src.as_u64x8()))
2635    }
2636}
2637
2638/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2639///
2640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu64&expand=3626)
2641#[inline]
2642#[target_feature(enable = "avx512f")]
2643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2644#[cfg_attr(test, assert_instr(vpmaxuq))]
2645pub fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2646    unsafe {
2647        let max = _mm512_max_epu64(a, b).as_u64x8();
2648        transmute(simd_select_bitmask(k, max, u64x8::ZERO))
2649    }
2650}
2651
2652/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2653///
2654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2655#[inline]
2656#[target_feature(enable = "avx512f,avx512vl")]
2657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2658#[cfg_attr(test, assert_instr(vpmaxuq))]
2659pub fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2660    unsafe {
2661        let a = a.as_u64x4();
2662        let b = b.as_u64x4();
2663        transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
2664    }
2665}
2666
2667/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2668///
2669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2670#[inline]
2671#[target_feature(enable = "avx512f,avx512vl")]
2672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2673#[cfg_attr(test, assert_instr(vpmaxuq))]
2674pub fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2675    unsafe {
2676        let max = _mm256_max_epu64(a, b).as_u64x4();
2677        transmute(simd_select_bitmask(k, max, src.as_u64x4()))
2678    }
2679}
2680
2681/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2682///
2683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2684#[inline]
2685#[target_feature(enable = "avx512f,avx512vl")]
2686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2687#[cfg_attr(test, assert_instr(vpmaxuq))]
2688pub fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2689    unsafe {
2690        let max = _mm256_max_epu64(a, b).as_u64x4();
2691        transmute(simd_select_bitmask(k, max, u64x4::ZERO))
2692    }
2693}
2694
2695/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2696///
2697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2698#[inline]
2699#[target_feature(enable = "avx512f,avx512vl")]
2700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2701#[cfg_attr(test, assert_instr(vpmaxuq))]
2702pub fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2703    unsafe {
2704        let a = a.as_u64x2();
2705        let b = b.as_u64x2();
2706        transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
2707    }
2708}
2709
2710/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2711///
2712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2713#[inline]
2714#[target_feature(enable = "avx512f,avx512vl")]
2715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2716#[cfg_attr(test, assert_instr(vpmaxuq))]
2717pub fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2718    unsafe {
2719        let max = _mm_max_epu64(a, b).as_u64x2();
2720        transmute(simd_select_bitmask(k, max, src.as_u64x2()))
2721    }
2722}
2723
2724/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2725///
2726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2727#[inline]
2728#[target_feature(enable = "avx512f,avx512vl")]
2729#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2730#[cfg_attr(test, assert_instr(vpmaxuq))]
2731pub fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2732    unsafe {
2733        let max = _mm_max_epu64(a, b).as_u64x2();
2734        transmute(simd_select_bitmask(k, max, u64x2::ZERO))
2735    }
2736}
2737
2738/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2739///
2740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2741#[inline]
2742#[target_feature(enable = "avx512f")]
2743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2744#[cfg_attr(test, assert_instr(vpminsd))]
2745pub fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2746    unsafe {
2747        let a = a.as_i32x16();
2748        let b = b.as_i32x16();
2749        transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
2750    }
2751}
2752
2753/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2754///
2755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2756#[inline]
2757#[target_feature(enable = "avx512f")]
2758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2759#[cfg_attr(test, assert_instr(vpminsd))]
2760pub fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2761    unsafe {
2762        let min = _mm512_min_epi32(a, b).as_i32x16();
2763        transmute(simd_select_bitmask(k, min, src.as_i32x16()))
2764    }
2765}
2766
2767/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2768///
2769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2770#[inline]
2771#[target_feature(enable = "avx512f")]
2772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2773#[cfg_attr(test, assert_instr(vpminsd))]
2774pub fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2775    unsafe {
2776        let min = _mm512_min_epi32(a, b).as_i32x16();
2777        transmute(simd_select_bitmask(k, min, i32x16::ZERO))
2778    }
2779}
2780
2781/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2782///
2783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2784#[inline]
2785#[target_feature(enable = "avx512f,avx512vl")]
2786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2787#[cfg_attr(test, assert_instr(vpminsd))]
2788pub fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2789    unsafe {
2790        let min = _mm256_min_epi32(a, b).as_i32x8();
2791        transmute(simd_select_bitmask(k, min, src.as_i32x8()))
2792    }
2793}
2794
2795/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2796///
2797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2798#[inline]
2799#[target_feature(enable = "avx512f,avx512vl")]
2800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2801#[cfg_attr(test, assert_instr(vpminsd))]
2802pub fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2803    unsafe {
2804        let min = _mm256_min_epi32(a, b).as_i32x8();
2805        transmute(simd_select_bitmask(k, min, i32x8::ZERO))
2806    }
2807}
2808
2809/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2810///
2811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2812#[inline]
2813#[target_feature(enable = "avx512f,avx512vl")]
2814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2815#[cfg_attr(test, assert_instr(vpminsd))]
2816pub fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2817    unsafe {
2818        let min = _mm_min_epi32(a, b).as_i32x4();
2819        transmute(simd_select_bitmask(k, min, src.as_i32x4()))
2820    }
2821}
2822
2823/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2824///
2825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2826#[inline]
2827#[target_feature(enable = "avx512f,avx512vl")]
2828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2829#[cfg_attr(test, assert_instr(vpminsd))]
2830pub fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2831    unsafe {
2832        let min = _mm_min_epi32(a, b).as_i32x4();
2833        transmute(simd_select_bitmask(k, min, i32x4::ZERO))
2834    }
2835}
2836
2837/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2838///
2839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
2840#[inline]
2841#[target_feature(enable = "avx512f")]
2842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2843#[cfg_attr(test, assert_instr(vpminsq))]
2844pub fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
2845    unsafe {
2846        let a = a.as_i64x8();
2847        let b = b.as_i64x8();
2848        transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
2849    }
2850}
2851
2852/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2853///
2854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
2855#[inline]
2856#[target_feature(enable = "avx512f")]
2857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2858#[cfg_attr(test, assert_instr(vpminsq))]
2859pub fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2860    unsafe {
2861        let min = _mm512_min_epi64(a, b).as_i64x8();
2862        transmute(simd_select_bitmask(k, min, src.as_i64x8()))
2863    }
2864}
2865
2866/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2867///
2868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi64&expand=3704)
2869#[inline]
2870#[target_feature(enable = "avx512f")]
2871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2872#[cfg_attr(test, assert_instr(vpminsq))]
2873pub fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2874    unsafe {
2875        let min = _mm512_min_epi64(a, b).as_i64x8();
2876        transmute(simd_select_bitmask(k, min, i64x8::ZERO))
2877    }
2878}
2879
2880/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2881///
2882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
2883#[inline]
2884#[target_feature(enable = "avx512f,avx512vl")]
2885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2886#[cfg_attr(test, assert_instr(vpminsq))]
2887pub fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
2888    unsafe {
2889        let a = a.as_i64x4();
2890        let b = b.as_i64x4();
2891        transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
2892    }
2893}
2894
2895/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2896///
2897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
2898#[inline]
2899#[target_feature(enable = "avx512f,avx512vl")]
2900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2901#[cfg_attr(test, assert_instr(vpminsq))]
2902pub fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2903    unsafe {
2904        let min = _mm256_min_epi64(a, b).as_i64x4();
2905        transmute(simd_select_bitmask(k, min, src.as_i64x4()))
2906    }
2907}
2908
2909/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2910///
2911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
2912#[inline]
2913#[target_feature(enable = "avx512f,avx512vl")]
2914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2915#[cfg_attr(test, assert_instr(vpminsq))]
2916pub fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2917    unsafe {
2918        let min = _mm256_min_epi64(a, b).as_i64x4();
2919        transmute(simd_select_bitmask(k, min, i64x4::ZERO))
2920    }
2921}
2922
2923/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2924///
2925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
2926#[inline]
2927#[target_feature(enable = "avx512f,avx512vl")]
2928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2929#[cfg_attr(test, assert_instr(vpminsq))]
2930pub fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
2931    unsafe {
2932        let a = a.as_i64x2();
2933        let b = b.as_i64x2();
2934        transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
2935    }
2936}
2937
2938/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2939///
2940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
2941#[inline]
2942#[target_feature(enable = "avx512f,avx512vl")]
2943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2944#[cfg_attr(test, assert_instr(vpminsq))]
2945pub fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2946    unsafe {
2947        let min = _mm_min_epi64(a, b).as_i64x2();
2948        transmute(simd_select_bitmask(k, min, src.as_i64x2()))
2949    }
2950}
2951
2952/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2953///
2954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
2955#[inline]
2956#[target_feature(enable = "avx512f,avx512vl")]
2957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2958#[cfg_attr(test, assert_instr(vpminsq))]
2959pub fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2960    unsafe {
2961        let min = _mm_min_epi64(a, b).as_i64x2();
2962        transmute(simd_select_bitmask(k, min, i64x2::ZERO))
2963    }
2964}
2965
2966/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
2967///
2968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
2969#[inline]
2970#[target_feature(enable = "avx512f")]
2971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2972#[cfg_attr(test, assert_instr(vminps))]
2973pub fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
2974    unsafe {
2975        transmute(vminps(
2976            a.as_f32x16(),
2977            b.as_f32x16(),
2978            _MM_FROUND_CUR_DIRECTION,
2979        ))
2980    }
2981}
2982
2983/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2984///
2985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
2986#[inline]
2987#[target_feature(enable = "avx512f")]
2988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2989#[cfg_attr(test, assert_instr(vminps))]
2990pub fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2991    unsafe {
2992        let min = _mm512_min_ps(a, b).as_f32x16();
2993        transmute(simd_select_bitmask(k, min, src.as_f32x16()))
2994    }
2995}
2996
2997/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2998///
2999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
3000#[inline]
3001#[target_feature(enable = "avx512f")]
3002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3003#[cfg_attr(test, assert_instr(vminps))]
3004pub fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
3005    unsafe {
3006        let min = _mm512_min_ps(a, b).as_f32x16();
3007        transmute(simd_select_bitmask(k, min, f32x16::ZERO))
3008    }
3009}
3010
3011/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3012///
3013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
3014#[inline]
3015#[target_feature(enable = "avx512f,avx512vl")]
3016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3017#[cfg_attr(test, assert_instr(vminps))]
3018pub fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
3019    unsafe {
3020        let min = _mm256_min_ps(a, b).as_f32x8();
3021        transmute(simd_select_bitmask(k, min, src.as_f32x8()))
3022    }
3023}
3024
3025/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3026///
3027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
3028#[inline]
3029#[target_feature(enable = "avx512f,avx512vl")]
3030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3031#[cfg_attr(test, assert_instr(vminps))]
3032pub fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
3033    unsafe {
3034        let min = _mm256_min_ps(a, b).as_f32x8();
3035        transmute(simd_select_bitmask(k, min, f32x8::ZERO))
3036    }
3037}
3038
3039/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3040///
3041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
3042#[inline]
3043#[target_feature(enable = "avx512f,avx512vl")]
3044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3045#[cfg_attr(test, assert_instr(vminps))]
3046pub fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
3047    unsafe {
3048        let min = _mm_min_ps(a, b).as_f32x4();
3049        transmute(simd_select_bitmask(k, min, src.as_f32x4()))
3050    }
3051}
3052
3053/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3054///
3055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
3056#[inline]
3057#[target_feature(enable = "avx512f,avx512vl")]
3058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3059#[cfg_attr(test, assert_instr(vminps))]
3060pub fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
3061    unsafe {
3062        let min = _mm_min_ps(a, b).as_f32x4();
3063        transmute(simd_select_bitmask(k, min, f32x4::ZERO))
3064    }
3065}
3066
3067/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
3068///
3069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_pd&expand=3759)
3070#[inline]
3071#[target_feature(enable = "avx512f")]
3072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3073#[cfg_attr(test, assert_instr(vminpd))]
3074pub fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
3075    unsafe { transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
3076}
3077
3078/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3079///
3080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_pd&expand=3757)
3081#[inline]
3082#[target_feature(enable = "avx512f")]
3083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3084#[cfg_attr(test, assert_instr(vminpd))]
3085pub fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3086    unsafe {
3087        let min = _mm512_min_pd(a, b).as_f64x8();
3088        transmute(simd_select_bitmask(k, min, src.as_f64x8()))
3089    }
3090}
3091
3092/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3093///
3094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_pd&expand=3758)
3095#[inline]
3096#[target_feature(enable = "avx512f")]
3097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3098#[cfg_attr(test, assert_instr(vminpd))]
3099pub fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3100    unsafe {
3101        let min = _mm512_min_pd(a, b).as_f64x8();
3102        transmute(simd_select_bitmask(k, min, f64x8::ZERO))
3103    }
3104}
3105
3106/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3107///
3108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
3109#[inline]
3110#[target_feature(enable = "avx512f,avx512vl")]
3111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3112#[cfg_attr(test, assert_instr(vminpd))]
3113pub fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3114    unsafe {
3115        let min = _mm256_min_pd(a, b).as_f64x4();
3116        transmute(simd_select_bitmask(k, min, src.as_f64x4()))
3117    }
3118}
3119
3120/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3121///
3122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
3123#[inline]
3124#[target_feature(enable = "avx512f,avx512vl")]
3125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3126#[cfg_attr(test, assert_instr(vminpd))]
3127pub fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3128    unsafe {
3129        let min = _mm256_min_pd(a, b).as_f64x4();
3130        transmute(simd_select_bitmask(k, min, f64x4::ZERO))
3131    }
3132}
3133
3134/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3135///
3136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
3137#[inline]
3138#[target_feature(enable = "avx512f,avx512vl")]
3139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3140#[cfg_attr(test, assert_instr(vminpd))]
3141pub fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3142    unsafe {
3143        let min = _mm_min_pd(a, b).as_f64x2();
3144        transmute(simd_select_bitmask(k, min, src.as_f64x2()))
3145    }
3146}
3147
3148/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3149///
3150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
3151#[inline]
3152#[target_feature(enable = "avx512f,avx512vl")]
3153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3154#[cfg_attr(test, assert_instr(vminpd))]
3155pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3156    unsafe {
3157        let min = _mm_min_pd(a, b).as_f64x2();
3158        transmute(simd_select_bitmask(k, min, f64x2::ZERO))
3159    }
3160}
3161
3162/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
3163///
3164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
3165#[inline]
3166#[target_feature(enable = "avx512f")]
3167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3168#[cfg_attr(test, assert_instr(vpminud))]
3169pub fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
3170    unsafe {
3171        let a = a.as_u32x16();
3172        let b = b.as_u32x16();
3173        transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
3174    }
3175}
3176
3177/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3178///
3179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
3180#[inline]
3181#[target_feature(enable = "avx512f")]
3182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3183#[cfg_attr(test, assert_instr(vpminud))]
3184pub fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3185    unsafe {
3186        let min = _mm512_min_epu32(a, b).as_u32x16();
3187        transmute(simd_select_bitmask(k, min, src.as_u32x16()))
3188    }
3189}
3190
3191/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3192///
3193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
3194#[inline]
3195#[target_feature(enable = "avx512f")]
3196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3197#[cfg_attr(test, assert_instr(vpminud))]
3198pub fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3199    unsafe {
3200        let min = _mm512_min_epu32(a, b).as_u32x16();
3201        transmute(simd_select_bitmask(k, min, u32x16::ZERO))
3202    }
3203}
3204
3205/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3206///
3207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
3208#[inline]
3209#[target_feature(enable = "avx512f,avx512vl")]
3210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3211#[cfg_attr(test, assert_instr(vpminud))]
3212pub fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3213    unsafe {
3214        let min = _mm256_min_epu32(a, b).as_u32x8();
3215        transmute(simd_select_bitmask(k, min, src.as_u32x8()))
3216    }
3217}
3218
3219/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3220///
3221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
3222#[inline]
3223#[target_feature(enable = "avx512f,avx512vl")]
3224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3225#[cfg_attr(test, assert_instr(vpminud))]
3226pub fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3227    unsafe {
3228        let min = _mm256_min_epu32(a, b).as_u32x8();
3229        transmute(simd_select_bitmask(k, min, u32x8::ZERO))
3230    }
3231}
3232
3233/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3234///
3235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
3236#[inline]
3237#[target_feature(enable = "avx512f,avx512vl")]
3238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3239#[cfg_attr(test, assert_instr(vpminud))]
3240pub fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3241    unsafe {
3242        let min = _mm_min_epu32(a, b).as_u32x4();
3243        transmute(simd_select_bitmask(k, min, src.as_u32x4()))
3244    }
3245}
3246
3247/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3248///
3249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
3250#[inline]
3251#[target_feature(enable = "avx512f,avx512vl")]
3252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3253#[cfg_attr(test, assert_instr(vpminud))]
3254pub fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3255    unsafe {
3256        let min = _mm_min_epu32(a, b).as_u32x4();
3257        transmute(simd_select_bitmask(k, min, u32x4::ZERO))
3258    }
3259}
3260
3261/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3262///
3263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
3264#[inline]
3265#[target_feature(enable = "avx512f")]
3266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3267#[cfg_attr(test, assert_instr(vpminuq))]
3268pub fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
3269    unsafe {
3270        let a = a.as_u64x8();
3271        let b = b.as_u64x8();
3272        transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
3273    }
3274}
3275
3276/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3277///
3278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
3279#[inline]
3280#[target_feature(enable = "avx512f")]
3281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3282#[cfg_attr(test, assert_instr(vpminuq))]
3283pub fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3284    unsafe {
3285        let min = _mm512_min_epu64(a, b).as_u64x8();
3286        transmute(simd_select_bitmask(k, min, src.as_u64x8()))
3287    }
3288}
3289
3290/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3291///
3292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
3293#[inline]
3294#[target_feature(enable = "avx512f")]
3295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3296#[cfg_attr(test, assert_instr(vpminuq))]
3297pub fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3298    unsafe {
3299        let min = _mm512_min_epu64(a, b).as_u64x8();
3300        transmute(simd_select_bitmask(k, min, u64x8::ZERO))
3301    }
3302}
3303
3304/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3305///
3306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
3307#[inline]
3308#[target_feature(enable = "avx512f,avx512vl")]
3309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3310#[cfg_attr(test, assert_instr(vpminuq))]
3311pub fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
3312    unsafe {
3313        let a = a.as_u64x4();
3314        let b = b.as_u64x4();
3315        transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
3316    }
3317}
3318
3319/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3320///
3321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
3322#[inline]
3323#[target_feature(enable = "avx512f,avx512vl")]
3324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3325#[cfg_attr(test, assert_instr(vpminuq))]
3326pub fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3327    unsafe {
3328        let min = _mm256_min_epu64(a, b).as_u64x4();
3329        transmute(simd_select_bitmask(k, min, src.as_u64x4()))
3330    }
3331}
3332
3333/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3334///
3335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
3336#[inline]
3337#[target_feature(enable = "avx512f,avx512vl")]
3338#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3339#[cfg_attr(test, assert_instr(vpminuq))]
3340pub fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3341    unsafe {
3342        let min = _mm256_min_epu64(a, b).as_u64x4();
3343        transmute(simd_select_bitmask(k, min, u64x4::ZERO))
3344    }
3345}
3346
3347/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3348///
3349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
3350#[inline]
3351#[target_feature(enable = "avx512f,avx512vl")]
3352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3353#[cfg_attr(test, assert_instr(vpminuq))]
3354pub fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
3355    unsafe {
3356        let a = a.as_u64x2();
3357        let b = b.as_u64x2();
3358        transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
3359    }
3360}
3361
3362/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3363///
3364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
3365#[inline]
3366#[target_feature(enable = "avx512f,avx512vl")]
3367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3368#[cfg_attr(test, assert_instr(vpminuq))]
3369pub fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3370    unsafe {
3371        let min = _mm_min_epu64(a, b).as_u64x2();
3372        transmute(simd_select_bitmask(k, min, src.as_u64x2()))
3373    }
3374}
3375
3376/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3377///
3378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
3379#[inline]
3380#[target_feature(enable = "avx512f,avx512vl")]
3381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3382#[cfg_attr(test, assert_instr(vpminuq))]
3383pub fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3384    unsafe {
3385        let min = _mm_min_epu64(a, b).as_u64x2();
3386        transmute(simd_select_bitmask(k, min, u64x2::ZERO))
3387    }
3388}
3389
3390/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
3391///
3392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
3393#[inline]
3394#[target_feature(enable = "avx512f")]
3395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3396#[cfg_attr(test, assert_instr(vsqrtps))]
3397pub fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3398    unsafe { simd_fsqrt(a) }
3399}
3400
3401/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3402///
3403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
3404#[inline]
3405#[target_feature(enable = "avx512f")]
3406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3407#[cfg_attr(test, assert_instr(vsqrtps))]
3408pub fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3409    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3410}
3411
3412/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3413///
3414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
3415#[inline]
3416#[target_feature(enable = "avx512f")]
3417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3418#[cfg_attr(test, assert_instr(vsqrtps))]
3419pub fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3420    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_ps()) }
3421}
3422
3423/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3424///
3425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
3426#[inline]
3427#[target_feature(enable = "avx512f,avx512vl")]
3428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3429#[cfg_attr(test, assert_instr(vsqrtps))]
3430pub fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3431    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3432}
3433
3434/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3435///
3436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3437#[inline]
3438#[target_feature(enable = "avx512f,avx512vl")]
3439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3440#[cfg_attr(test, assert_instr(vsqrtps))]
3441pub fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3442    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_ps()) }
3443}
3444
3445/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3446///
3447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3448#[inline]
3449#[target_feature(enable = "avx512f,avx512vl")]
3450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3451#[cfg_attr(test, assert_instr(vsqrtps))]
3452pub fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3453    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3454}
3455
3456/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3457///
3458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3459#[inline]
3460#[target_feature(enable = "avx512f,avx512vl")]
3461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3462#[cfg_attr(test, assert_instr(vsqrtps))]
3463pub fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3464    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_ps()) }
3465}
3466
3467/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3468///
3469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3470#[inline]
3471#[target_feature(enable = "avx512f")]
3472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3473#[cfg_attr(test, assert_instr(vsqrtpd))]
3474pub fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3475    unsafe { simd_fsqrt(a) }
3476}
3477
3478/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3479///
3480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3481#[inline]
3482#[target_feature(enable = "avx512f")]
3483#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3484#[cfg_attr(test, assert_instr(vsqrtpd))]
3485pub fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3486    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3487}
3488
3489/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3490///
3491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3492#[inline]
3493#[target_feature(enable = "avx512f")]
3494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3495#[cfg_attr(test, assert_instr(vsqrtpd))]
3496pub fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3497    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_pd()) }
3498}
3499
3500/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3501///
3502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3503#[inline]
3504#[target_feature(enable = "avx512f,avx512vl")]
3505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3506#[cfg_attr(test, assert_instr(vsqrtpd))]
3507pub fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3508    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3509}
3510
3511/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3512///
3513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3514#[inline]
3515#[target_feature(enable = "avx512f,avx512vl")]
3516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3517#[cfg_attr(test, assert_instr(vsqrtpd))]
3518pub fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3519    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_pd()) }
3520}
3521
3522/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3523///
3524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3525#[inline]
3526#[target_feature(enable = "avx512f,avx512vl")]
3527#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3528#[cfg_attr(test, assert_instr(vsqrtpd))]
3529pub fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3530    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3531}
3532
3533/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3534///
3535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3536#[inline]
3537#[target_feature(enable = "avx512f,avx512vl")]
3538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3539#[cfg_attr(test, assert_instr(vsqrtpd))]
3540pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3541    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_pd()) }
3542}
3543
3544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3545///
3546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3547#[inline]
3548#[target_feature(enable = "avx512f")]
3549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3550#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3551pub fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3552    unsafe { simd_fma(a, b, c) }
3553}
3554
3555/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3556///
3557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3558#[inline]
3559#[target_feature(enable = "avx512f")]
3560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3561#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3562pub fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3563    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), a) }
3564}
3565
3566/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3567///
3568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3569#[inline]
3570#[target_feature(enable = "avx512f")]
3571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3572#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3573pub fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3574    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), _mm512_setzero_ps()) }
3575}
3576
3577/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3578///
3579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3580#[inline]
3581#[target_feature(enable = "avx512f")]
3582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3583#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3584pub fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3585    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), c) }
3586}
3587
3588/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3589///
3590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3591#[inline]
3592#[target_feature(enable = "avx512f,avx512vl")]
3593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3594#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3595pub fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3596    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), a) }
3597}
3598
3599/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3600///
3601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3602#[inline]
3603#[target_feature(enable = "avx512f,avx512vl")]
3604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3605#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3606pub fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3607    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), _mm256_setzero_ps()) }
3608}
3609
3610/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3611///
3612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3613#[inline]
3614#[target_feature(enable = "avx512f,avx512vl")]
3615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3616#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3617pub fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3618    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), c) }
3619}
3620
3621/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3622///
3623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3624#[inline]
3625#[target_feature(enable = "avx512f,avx512vl")]
3626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3627#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3628pub fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3629    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), a) }
3630}
3631
3632/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3633///
3634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3635#[inline]
3636#[target_feature(enable = "avx512f,avx512vl")]
3637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3638#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3639pub fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3640    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), _mm_setzero_ps()) }
3641}
3642
3643/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3644///
3645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3646#[inline]
3647#[target_feature(enable = "avx512f,avx512vl")]
3648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3649#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3650pub fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3651    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), c) }
3652}
3653
3654/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3655///
3656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3657#[inline]
3658#[target_feature(enable = "avx512f")]
3659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3660#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3661pub fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3662    unsafe { simd_fma(a, b, c) }
3663}
3664
3665/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3666///
3667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3668#[inline]
3669#[target_feature(enable = "avx512f")]
3670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3671#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3672pub fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3673    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), a) }
3674}
3675
3676/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3677///
3678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3679#[inline]
3680#[target_feature(enable = "avx512f")]
3681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3682#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3683pub fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3684    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), _mm512_setzero_pd()) }
3685}
3686
3687/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3688///
3689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3690#[inline]
3691#[target_feature(enable = "avx512f")]
3692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3693#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3694pub fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3695    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), c) }
3696}
3697
3698/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3699///
3700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3701#[inline]
3702#[target_feature(enable = "avx512f,avx512vl")]
3703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3704#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3705pub fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3706    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), a) }
3707}
3708
3709/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3710///
3711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3712#[inline]
3713#[target_feature(enable = "avx512f,avx512vl")]
3714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3715#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3716pub fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3717    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), _mm256_setzero_pd()) }
3718}
3719
3720/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3721///
3722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3723#[inline]
3724#[target_feature(enable = "avx512f,avx512vl")]
3725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3726#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3727pub fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3728    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), c) }
3729}
3730
3731/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3732///
3733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3734#[inline]
3735#[target_feature(enable = "avx512f,avx512vl")]
3736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3737#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3738pub fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3739    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), a) }
3740}
3741
3742/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3743///
3744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3745#[inline]
3746#[target_feature(enable = "avx512f,avx512vl")]
3747#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3748#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3749pub fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3750    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), _mm_setzero_pd()) }
3751}
3752
3753/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3754///
3755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3756#[inline]
3757#[target_feature(enable = "avx512f,avx512vl")]
3758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3759#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3760pub fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3761    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), c) }
3762}
3763
3764/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3765///
3766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3767#[inline]
3768#[target_feature(enable = "avx512f")]
3769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3770#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3771pub fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3772    unsafe { simd_fma(a, b, simd_neg(c)) }
3773}
3774
3775/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3776///
3777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3778#[inline]
3779#[target_feature(enable = "avx512f")]
3780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3781#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3782pub fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3783    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), a) }
3784}
3785
3786/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3787///
3788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3789#[inline]
3790#[target_feature(enable = "avx512f")]
3791#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3792#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3793pub fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3794    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), _mm512_setzero_ps()) }
3795}
3796
3797/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3798///
3799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3800#[inline]
3801#[target_feature(enable = "avx512f")]
3802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3803#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3804pub fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3805    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), c) }
3806}
3807
3808/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3809///
3810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3811#[inline]
3812#[target_feature(enable = "avx512f,avx512vl")]
3813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3814#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3815pub fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3816    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), a) }
3817}
3818
3819/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3820///
3821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
3822#[inline]
3823#[target_feature(enable = "avx512f,avx512vl")]
3824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3825#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3826pub fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3827    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), _mm256_setzero_ps()) }
3828}
3829
3830/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3831///
3832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
3833#[inline]
3834#[target_feature(enable = "avx512f,avx512vl")]
3835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3836#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3837pub fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3838    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), c) }
3839}
3840
3841/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3842///
3843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
3844#[inline]
3845#[target_feature(enable = "avx512f,avx512vl")]
3846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3847#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3848pub fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3849    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), a) }
3850}
3851
3852/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3853///
3854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
3855#[inline]
3856#[target_feature(enable = "avx512f,avx512vl")]
3857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3858#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3859pub fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3860    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), _mm_setzero_ps()) }
3861}
3862
3863/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3864///
3865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
3866#[inline]
3867#[target_feature(enable = "avx512f,avx512vl")]
3868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3869#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3870pub fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3871    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), c) }
3872}
3873
3874/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3875///
3876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
3877#[inline]
3878#[target_feature(enable = "avx512f")]
3879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3880#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3881pub fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3882    unsafe { simd_fma(a, b, simd_neg(c)) }
3883}
3884
3885/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3886///
3887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
3888#[inline]
3889#[target_feature(enable = "avx512f")]
3890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3891#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3892pub fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3893    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), a) }
3894}
3895
3896/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3897///
3898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
3899#[inline]
3900#[target_feature(enable = "avx512f")]
3901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3902#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3903pub fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3904    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), _mm512_setzero_pd()) }
3905}
3906
3907/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3908///
3909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
3910#[inline]
3911#[target_feature(enable = "avx512f")]
3912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3913#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3914pub fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3915    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), c) }
3916}
3917
3918/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3919///
3920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
3921#[inline]
3922#[target_feature(enable = "avx512f,avx512vl")]
3923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3924#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3925pub fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3926    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), a) }
3927}
3928
3929/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3930///
3931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
3932#[inline]
3933#[target_feature(enable = "avx512f,avx512vl")]
3934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3935#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3936pub fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3937    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), _mm256_setzero_pd()) }
3938}
3939
3940/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3941///
3942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
3943#[inline]
3944#[target_feature(enable = "avx512f,avx512vl")]
3945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3946#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3947pub fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3948    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), c) }
3949}
3950
3951/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3952///
3953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
3954#[inline]
3955#[target_feature(enable = "avx512f,avx512vl")]
3956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3957#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3958pub fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3959    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), a) }
3960}
3961
3962/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3963///
3964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
3965#[inline]
3966#[target_feature(enable = "avx512f,avx512vl")]
3967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3968#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3969pub fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3970    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), _mm_setzero_pd()) }
3971}
3972
3973/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3974///
3975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
3976#[inline]
3977#[target_feature(enable = "avx512f,avx512vl")]
3978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3979#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3980pub fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3981    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), c) }
3982}
3983
3984/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3985///
3986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
3987#[inline]
3988#[target_feature(enable = "avx512f")]
3989#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3990#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3991pub fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3992    unsafe {
3993        let add = simd_fma(a, b, c);
3994        let sub = simd_fma(a, b, simd_neg(c));
3995        simd_shuffle!(
3996            add,
3997            sub,
3998            [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15]
3999        )
4000    }
4001}
4002
4003/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4004///
4005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
4006#[inline]
4007#[target_feature(enable = "avx512f")]
4008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4009#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4010pub fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4011    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), a) }
4012}
4013
4014/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4015///
4016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
4017#[inline]
4018#[target_feature(enable = "avx512f")]
4019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4020#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4021pub fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4022    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), _mm512_setzero_ps()) }
4023}
4024
4025/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4026///
4027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
4028#[inline]
4029#[target_feature(enable = "avx512f")]
4030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4031#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4032pub fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4033    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), c) }
4034}
4035
4036/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4037///
4038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
4039#[inline]
4040#[target_feature(enable = "avx512f,avx512vl")]
4041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4042#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4043pub fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4044    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), a) }
4045}
4046
4047/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4048///
4049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
4050#[inline]
4051#[target_feature(enable = "avx512f,avx512vl")]
4052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4053#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4054pub fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4055    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), _mm256_setzero_ps()) }
4056}
4057
4058/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4059///
4060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
4061#[inline]
4062#[target_feature(enable = "avx512f,avx512vl")]
4063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4064#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4065pub fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4066    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), c) }
4067}
4068
4069/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4070///
4071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
4072#[inline]
4073#[target_feature(enable = "avx512f,avx512vl")]
4074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4075#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4076pub fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4077    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), a) }
4078}
4079
4080/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4081///
4082/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
4083#[inline]
4084#[target_feature(enable = "avx512f,avx512vl")]
4085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4086#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4087pub fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4088    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), _mm_setzero_ps()) }
4089}
4090
4091/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4092///
4093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
4094#[inline]
4095#[target_feature(enable = "avx512f,avx512vl")]
4096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4097#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4098pub fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4099    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), c) }
4100}
4101
4102/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4103///
4104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
4105#[inline]
4106#[target_feature(enable = "avx512f")]
4107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4108#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4109pub fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4110    unsafe {
4111        let add = simd_fma(a, b, c);
4112        let sub = simd_fma(a, b, simd_neg(c));
4113        simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
4114    }
4115}
4116
4117/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4118///
4119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
4120#[inline]
4121#[target_feature(enable = "avx512f")]
4122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4123#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4124pub fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4125    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), a) }
4126}
4127
4128/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4129///
4130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
4131#[inline]
4132#[target_feature(enable = "avx512f")]
4133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4134#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4135pub fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4136    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), _mm512_setzero_pd()) }
4137}
4138
4139/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4140///
4141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_pd&expand=2613)
4142#[inline]
4143#[target_feature(enable = "avx512f")]
4144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4145#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4146pub fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4147    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), c) }
4148}
4149
4150/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4151///
4152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
4153#[inline]
4154#[target_feature(enable = "avx512f,avx512vl")]
4155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4156#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4157pub fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4158    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), a) }
4159}
4160
4161/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4162///
4163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
4164#[inline]
4165#[target_feature(enable = "avx512f,avx512vl")]
4166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4167#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4168pub fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4169    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), _mm256_setzero_pd()) }
4170}
4171
4172/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4173///
4174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
4175#[inline]
4176#[target_feature(enable = "avx512f,avx512vl")]
4177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4178#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4179pub fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4180    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), c) }
4181}
4182
4183/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4184///
4185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
4186#[inline]
4187#[target_feature(enable = "avx512f,avx512vl")]
4188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4189#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4190pub fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4191    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), a) }
4192}
4193
4194/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4195///
4196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
4197#[inline]
4198#[target_feature(enable = "avx512f,avx512vl")]
4199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4200#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4201pub fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4202    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), _mm_setzero_pd()) }
4203}
4204
4205/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4206///
4207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
4208#[inline]
4209#[target_feature(enable = "avx512f,avx512vl")]
4210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4211#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4212pub fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4213    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), c) }
4214}
4215
4216/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4217///
4218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
4219#[inline]
4220#[target_feature(enable = "avx512f")]
4221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4222#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4223pub fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4224    unsafe {
4225        let add = simd_fma(a, b, c);
4226        let sub = simd_fma(a, b, simd_neg(c));
4227        simd_shuffle!(
4228            add,
4229            sub,
4230            [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31]
4231        )
4232    }
4233}
4234
4235/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4236///
4237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
4238#[inline]
4239#[target_feature(enable = "avx512f")]
4240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4241#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4242pub fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4243    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), a) }
4244}
4245
4246/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4247///
4248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
4249#[inline]
4250#[target_feature(enable = "avx512f")]
4251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4252#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4253pub fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4254    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), _mm512_setzero_ps()) }
4255}
4256
4257/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4258///
4259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
4260#[inline]
4261#[target_feature(enable = "avx512f")]
4262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4263#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4264pub fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4265    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), c) }
4266}
4267
4268/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4269///
4270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
4271#[inline]
4272#[target_feature(enable = "avx512f,avx512vl")]
4273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4274#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4275pub fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4276    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), a) }
4277}
4278
4279/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4280///
4281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
4282#[inline]
4283#[target_feature(enable = "avx512f,avx512vl")]
4284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4285#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4286pub fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4287    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), _mm256_setzero_ps()) }
4288}
4289
4290/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4291///
4292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
4293#[inline]
4294#[target_feature(enable = "avx512f,avx512vl")]
4295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4296#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4297pub fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4298    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), c) }
4299}
4300
4301/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4302///
4303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
4304#[inline]
4305#[target_feature(enable = "avx512f,avx512vl")]
4306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4307#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4308pub fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4309    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), a) }
4310}
4311
4312/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4313///
4314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
4315#[inline]
4316#[target_feature(enable = "avx512f,avx512vl")]
4317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4318#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4319pub fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4320    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), _mm_setzero_ps()) }
4321}
4322
4323/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4324///
4325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
4326#[inline]
4327#[target_feature(enable = "avx512f,avx512vl")]
4328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4329#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4330pub fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4331    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), c) }
4332}
4333
4334/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4335///
4336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
4337#[inline]
4338#[target_feature(enable = "avx512f")]
4339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4340#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4341pub fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4342    unsafe {
4343        let add = simd_fma(a, b, c);
4344        let sub = simd_fma(a, b, simd_neg(c));
4345        simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
4346    }
4347}
4348
4349/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4350///
4351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
4352#[inline]
4353#[target_feature(enable = "avx512f")]
4354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4355#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4356pub fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4357    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), a) }
4358}
4359
4360/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4361///
4362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
4363#[inline]
4364#[target_feature(enable = "avx512f")]
4365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4366#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4367pub fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4368    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), _mm512_setzero_pd()) }
4369}
4370
4371/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4372///
4373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
4374#[inline]
4375#[target_feature(enable = "avx512f")]
4376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4377#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4378pub fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4379    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), c) }
4380}
4381
4382/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4383///
4384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
4385#[inline]
4386#[target_feature(enable = "avx512f,avx512vl")]
4387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4388#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4389pub fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4390    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), a) }
4391}
4392
4393/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4394///
4395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
4396#[inline]
4397#[target_feature(enable = "avx512f,avx512vl")]
4398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4399#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4400pub fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4401    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), _mm256_setzero_pd()) }
4402}
4403
4404/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4405///
4406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
4407#[inline]
4408#[target_feature(enable = "avx512f,avx512vl")]
4409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4410#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4411pub fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4412    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), c) }
4413}
4414
4415/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4416///
4417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
4418#[inline]
4419#[target_feature(enable = "avx512f,avx512vl")]
4420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4421#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4422pub fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4423    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), a) }
4424}
4425
4426/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4427///
4428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
4429#[inline]
4430#[target_feature(enable = "avx512f,avx512vl")]
4431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4432#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4433pub fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4434    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), _mm_setzero_pd()) }
4435}
4436
4437/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4438///
4439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4440#[inline]
4441#[target_feature(enable = "avx512f,avx512vl")]
4442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4443#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4444pub fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4445    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), c) }
4446}
4447
4448/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4449///
4450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4451#[inline]
4452#[target_feature(enable = "avx512f")]
4453#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4454#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4455pub fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4456    unsafe { simd_fma(simd_neg(a), b, c) }
4457}
4458
4459/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4460///
4461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4462#[inline]
4463#[target_feature(enable = "avx512f")]
4464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4465#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4466pub fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4467    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), a) }
4468}
4469
4470/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4471///
4472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4473#[inline]
4474#[target_feature(enable = "avx512f")]
4475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4476#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4477pub fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4478    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), _mm512_setzero_ps()) }
4479}
4480
4481/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4482///
4483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4484#[inline]
4485#[target_feature(enable = "avx512f")]
4486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4487#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4488pub fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4489    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), c) }
4490}
4491
4492/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4493///
4494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4495#[inline]
4496#[target_feature(enable = "avx512f,avx512vl")]
4497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4498#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4499pub fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4500    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), a) }
4501}
4502
4503/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4504///
4505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4506#[inline]
4507#[target_feature(enable = "avx512f,avx512vl")]
4508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4509#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4510pub fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4511    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), _mm256_setzero_ps()) }
4512}
4513
4514/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4515///
4516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4517#[inline]
4518#[target_feature(enable = "avx512f,avx512vl")]
4519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4520#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4521pub fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4522    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), c) }
4523}
4524
4525/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4526///
4527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4528#[inline]
4529#[target_feature(enable = "avx512f,avx512vl")]
4530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4531#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4532pub fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4533    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), a) }
4534}
4535
4536/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4537///
4538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4539#[inline]
4540#[target_feature(enable = "avx512f,avx512vl")]
4541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4542#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4543pub fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4544    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), _mm_setzero_ps()) }
4545}
4546
4547/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4548///
4549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4550#[inline]
4551#[target_feature(enable = "avx512f,avx512vl")]
4552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4553#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4554pub fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4555    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), c) }
4556}
4557
4558/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4559///
4560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4561#[inline]
4562#[target_feature(enable = "avx512f")]
4563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4564#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4565pub fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4566    unsafe { simd_fma(simd_neg(a), b, c) }
4567}
4568
4569/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4570///
4571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4572#[inline]
4573#[target_feature(enable = "avx512f")]
4574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4575#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4576pub fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4577    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), a) }
4578}
4579
4580/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4581///
4582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4583#[inline]
4584#[target_feature(enable = "avx512f")]
4585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4586#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4587pub fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4588    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), _mm512_setzero_pd()) }
4589}
4590
4591/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4592///
4593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4594#[inline]
4595#[target_feature(enable = "avx512f")]
4596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4597#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4598pub fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4599    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), c) }
4600}
4601
4602/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4603///
4604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4605#[inline]
4606#[target_feature(enable = "avx512f,avx512vl")]
4607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4608#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4609pub fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4610    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), a) }
4611}
4612
4613/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4614///
4615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4616#[inline]
4617#[target_feature(enable = "avx512f,avx512vl")]
4618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4619#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4620pub fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4621    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), _mm256_setzero_pd()) }
4622}
4623
4624/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4625///
4626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4627#[inline]
4628#[target_feature(enable = "avx512f,avx512vl")]
4629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4630#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4631pub fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4632    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), c) }
4633}
4634
4635/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4636///
4637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4638#[inline]
4639#[target_feature(enable = "avx512f,avx512vl")]
4640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4641#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4642pub fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4643    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), a) }
4644}
4645
4646/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4647///
4648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4649#[inline]
4650#[target_feature(enable = "avx512f,avx512vl")]
4651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4652#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4653pub fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4654    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), _mm_setzero_pd()) }
4655}
4656
4657/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4658///
4659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4660#[inline]
4661#[target_feature(enable = "avx512f,avx512vl")]
4662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4663#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4664pub fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4665    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), c) }
4666}
4667
4668/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4669///
4670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4671#[inline]
4672#[target_feature(enable = "avx512f")]
4673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4674#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4675pub fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4676    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
4677}
4678
4679/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4680///
4681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4682#[inline]
4683#[target_feature(enable = "avx512f")]
4684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4685#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4686pub fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4687    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), a) }
4688}
4689
4690/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4691///
4692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4693#[inline]
4694#[target_feature(enable = "avx512f")]
4695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4696#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4697pub fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4698    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), _mm512_setzero_ps()) }
4699}
4700
4701/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4702///
4703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4704#[inline]
4705#[target_feature(enable = "avx512f")]
4706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4707#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4708pub fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4709    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), c) }
4710}
4711
4712/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4713///
4714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4715#[inline]
4716#[target_feature(enable = "avx512f,avx512vl")]
4717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4718#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4719pub fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4720    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), a) }
4721}
4722
4723/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4724///
4725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4726#[inline]
4727#[target_feature(enable = "avx512f,avx512vl")]
4728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4729#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4730pub fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4731    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), _mm256_setzero_ps()) }
4732}
4733
4734/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4735///
4736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
4737#[inline]
4738#[target_feature(enable = "avx512f,avx512vl")]
4739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4740#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4741pub fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4742    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), c) }
4743}
4744
4745/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4746///
4747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
4748#[inline]
4749#[target_feature(enable = "avx512f,avx512vl")]
4750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4751#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4752pub fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4753    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), a) }
4754}
4755
4756/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4757///
4758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
4759#[inline]
4760#[target_feature(enable = "avx512f,avx512vl")]
4761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4762#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4763pub fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4764    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), _mm_setzero_ps()) }
4765}
4766
4767/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4768///
4769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
4770#[inline]
4771#[target_feature(enable = "avx512f,avx512vl")]
4772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4773#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4774pub fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4775    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), c) }
4776}
4777
4778/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4779///
4780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
4781#[inline]
4782#[target_feature(enable = "avx512f")]
4783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4784#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4785pub fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4786    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
4787}
4788
4789/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4790///
4791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
4792#[inline]
4793#[target_feature(enable = "avx512f")]
4794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4795#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4796pub fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4797    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), a) }
4798}
4799
4800/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4801///
4802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
4803#[inline]
4804#[target_feature(enable = "avx512f")]
4805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4806#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4807pub fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4808    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), _mm512_setzero_pd()) }
4809}
4810
4811/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4812///
4813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
4814#[inline]
4815#[target_feature(enable = "avx512f")]
4816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4817#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4818pub fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4819    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), c) }
4820}
4821
4822/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4823///
4824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
4825#[inline]
4826#[target_feature(enable = "avx512f,avx512vl")]
4827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4828#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4829pub fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4830    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), a) }
4831}
4832
4833/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4834///
4835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
4836#[inline]
4837#[target_feature(enable = "avx512f,avx512vl")]
4838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4839#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4840pub fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4841    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), _mm256_setzero_pd()) }
4842}
4843
4844/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4845///
4846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
4847#[inline]
4848#[target_feature(enable = "avx512f,avx512vl")]
4849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4850#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4851pub fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4852    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), c) }
4853}
4854
4855/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4856///
4857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
4858#[inline]
4859#[target_feature(enable = "avx512f,avx512vl")]
4860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4861#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4862pub fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4863    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), a) }
4864}
4865
4866/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4867///
4868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
4869#[inline]
4870#[target_feature(enable = "avx512f,avx512vl")]
4871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4872#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4873pub fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4874    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), _mm_setzero_pd()) }
4875}
4876
4877/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4878///
4879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
4880#[inline]
4881#[target_feature(enable = "avx512f,avx512vl")]
4882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4883#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4884pub fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4885    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), c) }
4886}
4887
4888/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4889///
4890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
4891#[inline]
4892#[target_feature(enable = "avx512f")]
4893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4894#[cfg_attr(test, assert_instr(vrcp14ps))]
4895pub fn _mm512_rcp14_ps(a: __m512) -> __m512 {
4896    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
4897}
4898
4899/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4900///
4901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
4902#[inline]
4903#[target_feature(enable = "avx512f")]
4904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4905#[cfg_attr(test, assert_instr(vrcp14ps))]
4906pub fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
4907    unsafe { transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k)) }
4908}
4909
4910/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4911///
4912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
4913#[inline]
4914#[target_feature(enable = "avx512f")]
4915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4916#[cfg_attr(test, assert_instr(vrcp14ps))]
4917pub fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
4918    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, k)) }
4919}
4920
4921/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4922///
4923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
4924#[inline]
4925#[target_feature(enable = "avx512f,avx512vl")]
4926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4927#[cfg_attr(test, assert_instr(vrcp14ps))]
4928pub fn _mm256_rcp14_ps(a: __m256) -> __m256 {
4929    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
4930}
4931
4932/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4933///
4934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
4935#[inline]
4936#[target_feature(enable = "avx512f,avx512vl")]
4937#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4938#[cfg_attr(test, assert_instr(vrcp14ps))]
4939pub fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
4940    unsafe { transmute(vrcp14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
4941}
4942
4943/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4944///
4945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
4946#[inline]
4947#[target_feature(enable = "avx512f,avx512vl")]
4948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4949#[cfg_attr(test, assert_instr(vrcp14ps))]
4950pub fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
4951    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
4952}
4953
4954/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4955///
4956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
4957#[inline]
4958#[target_feature(enable = "avx512f,avx512vl")]
4959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4960#[cfg_attr(test, assert_instr(vrcp14ps))]
4961pub fn _mm_rcp14_ps(a: __m128) -> __m128 {
4962    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
4963}
4964
4965/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4966///
4967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
4968#[inline]
4969#[target_feature(enable = "avx512f,avx512vl")]
4970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4971#[cfg_attr(test, assert_instr(vrcp14ps))]
4972pub fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
4973    unsafe { transmute(vrcp14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
4974}
4975
4976/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4977///
4978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
4979#[inline]
4980#[target_feature(enable = "avx512f,avx512vl")]
4981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4982#[cfg_attr(test, assert_instr(vrcp14ps))]
4983pub fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
4984    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
4985}
4986
4987/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4988///
4989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
4990#[inline]
4991#[target_feature(enable = "avx512f")]
4992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4993#[cfg_attr(test, assert_instr(vrcp14pd))]
4994pub fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
4995    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
4996}
4997
4998/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4999///
5000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
5001#[inline]
5002#[target_feature(enable = "avx512f")]
5003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5004#[cfg_attr(test, assert_instr(vrcp14pd))]
5005pub fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5006    unsafe { transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5007}
5008
5009/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5010///
5011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
5012#[inline]
5013#[target_feature(enable = "avx512f")]
5014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5015#[cfg_attr(test, assert_instr(vrcp14pd))]
5016pub fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
5017    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5018}
5019
5020/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5021///
5022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
5023#[inline]
5024#[target_feature(enable = "avx512f,avx512vl")]
5025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5026#[cfg_attr(test, assert_instr(vrcp14pd))]
5027pub fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
5028    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5029}
5030
5031/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5032///
5033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
5034#[inline]
5035#[target_feature(enable = "avx512f,avx512vl")]
5036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5037#[cfg_attr(test, assert_instr(vrcp14pd))]
5038pub fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5039    unsafe { transmute(vrcp14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5040}
5041
5042/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5043///
5044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
5045#[inline]
5046#[target_feature(enable = "avx512f,avx512vl")]
5047#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5048#[cfg_attr(test, assert_instr(vrcp14pd))]
5049pub fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
5050    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5051}
5052
5053/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5054///
5055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
5056#[inline]
5057#[target_feature(enable = "avx512f,avx512vl")]
5058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5059#[cfg_attr(test, assert_instr(vrcp14pd))]
5060pub fn _mm_rcp14_pd(a: __m128d) -> __m128d {
5061    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5062}
5063
5064/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5065///
5066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
5067#[inline]
5068#[target_feature(enable = "avx512f,avx512vl")]
5069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5070#[cfg_attr(test, assert_instr(vrcp14pd))]
5071pub fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5072    unsafe { transmute(vrcp14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5073}
5074
5075/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5076///
5077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
5078#[inline]
5079#[target_feature(enable = "avx512f,avx512vl")]
5080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5081#[cfg_attr(test, assert_instr(vrcp14pd))]
5082pub fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
5083    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5084}
5085
5086/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5087///
5088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
5089#[inline]
5090#[target_feature(enable = "avx512f")]
5091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5092#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5093pub fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
5094    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
5095}
5096
5097/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5098///
5099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
5100#[inline]
5101#[target_feature(enable = "avx512f")]
5102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5103#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5104pub fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5105    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k)) }
5106}
5107
5108/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5109///
5110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
5111#[inline]
5112#[target_feature(enable = "avx512f")]
5113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5114#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5115pub fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
5116    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, k)) }
5117}
5118
5119/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5120///
5121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps)
5122#[inline]
5123#[target_feature(enable = "avx512f,avx512vl")]
5124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5125#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5126pub fn _mm256_rsqrt14_ps(a: __m256) -> __m256 {
5127    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5128}
5129
5130/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
5133#[inline]
5134#[target_feature(enable = "avx512f,avx512vl")]
5135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5136#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5137pub fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5138    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
5139}
5140
5141/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5142///
5143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
5144#[inline]
5145#[target_feature(enable = "avx512f,avx512vl")]
5146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5147#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5148pub fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
5149    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
5150}
5151
5152/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5153///
5154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps)
5155#[inline]
5156#[target_feature(enable = "avx512f,avx512vl")]
5157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5158#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5159pub fn _mm_rsqrt14_ps(a: __m128) -> __m128 {
5160    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5161}
5162
5163/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5164///
5165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
5166#[inline]
5167#[target_feature(enable = "avx512f,avx512vl")]
5168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5169#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5170pub fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5171    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
5172}
5173
5174/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5175///
5176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
5177#[inline]
5178#[target_feature(enable = "avx512f,avx512vl")]
5179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5180#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5181pub fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
5182    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
5183}
5184
5185/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5186///
5187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
5188#[inline]
5189#[target_feature(enable = "avx512f")]
5190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5191#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5192pub fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
5193    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
5194}
5195
5196/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5197///
5198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
5199#[inline]
5200#[target_feature(enable = "avx512f")]
5201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5202#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5203pub fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5204    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5205}
5206
5207/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5208///
5209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
5210#[inline]
5211#[target_feature(enable = "avx512f")]
5212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5213#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5214pub fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
5215    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5216}
5217
5218/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5219///
5220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd)
5221#[inline]
5222#[target_feature(enable = "avx512f,avx512vl")]
5223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5224#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5225pub fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d {
5226    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5227}
5228
5229/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5230///
5231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
5232#[inline]
5233#[target_feature(enable = "avx512f,avx512vl")]
5234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5235#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5236pub fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5237    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5238}
5239
5240/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
5243#[inline]
5244#[target_feature(enable = "avx512f,avx512vl")]
5245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5246#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5247pub fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
5248    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5249}
5250
5251/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5252///
5253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd)
5254#[inline]
5255#[target_feature(enable = "avx512f,avx512vl")]
5256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5257#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5258pub fn _mm_rsqrt14_pd(a: __m128d) -> __m128d {
5259    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5260}
5261
5262/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5263///
5264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
5265#[inline]
5266#[target_feature(enable = "avx512f,avx512vl")]
5267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5268#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5269pub fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5270    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5271}
5272
5273/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5274///
5275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
5276#[inline]
5277#[target_feature(enable = "avx512f,avx512vl")]
5278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5279#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5280pub fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
5281    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5282}
5283
5284/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5285///
5286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
5287#[inline]
5288#[target_feature(enable = "avx512f")]
5289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5290#[cfg_attr(test, assert_instr(vgetexpps))]
5291pub fn _mm512_getexp_ps(a: __m512) -> __m512 {
5292    unsafe {
5293        transmute(vgetexpps(
5294            a.as_f32x16(),
5295            f32x16::ZERO,
5296            0b11111111_11111111,
5297            _MM_FROUND_CUR_DIRECTION,
5298        ))
5299    }
5300}
5301
5302/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5303///
5304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
5305#[inline]
5306#[target_feature(enable = "avx512f")]
5307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5308#[cfg_attr(test, assert_instr(vgetexpps))]
5309pub fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5310    unsafe {
5311        transmute(vgetexpps(
5312            a.as_f32x16(),
5313            src.as_f32x16(),
5314            k,
5315            _MM_FROUND_CUR_DIRECTION,
5316        ))
5317    }
5318}
5319
5320/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5321///
5322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
5323#[inline]
5324#[target_feature(enable = "avx512f")]
5325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5326#[cfg_attr(test, assert_instr(vgetexpps))]
5327pub fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
5328    unsafe {
5329        transmute(vgetexpps(
5330            a.as_f32x16(),
5331            f32x16::ZERO,
5332            k,
5333            _MM_FROUND_CUR_DIRECTION,
5334        ))
5335    }
5336}
5337
5338/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5339///
5340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
5341#[inline]
5342#[target_feature(enable = "avx512f,avx512vl")]
5343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5344#[cfg_attr(test, assert_instr(vgetexpps))]
5345pub fn _mm256_getexp_ps(a: __m256) -> __m256 {
5346    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5347}
5348
5349/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5350///
5351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
5352#[inline]
5353#[target_feature(enable = "avx512f,avx512vl")]
5354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5355#[cfg_attr(test, assert_instr(vgetexpps))]
5356pub fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5357    unsafe { transmute(vgetexpps256(a.as_f32x8(), src.as_f32x8(), k)) }
5358}
5359
5360/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5361///
5362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
5363#[inline]
5364#[target_feature(enable = "avx512f,avx512vl")]
5365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5366#[cfg_attr(test, assert_instr(vgetexpps))]
5367pub fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
5368    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, k)) }
5369}
5370
5371/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5372///
5373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
5374#[inline]
5375#[target_feature(enable = "avx512f,avx512vl")]
5376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5377#[cfg_attr(test, assert_instr(vgetexpps))]
5378pub fn _mm_getexp_ps(a: __m128) -> __m128 {
5379    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5380}
5381
5382/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5383///
5384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
5385#[inline]
5386#[target_feature(enable = "avx512f,avx512vl")]
5387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5388#[cfg_attr(test, assert_instr(vgetexpps))]
5389pub fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5390    unsafe { transmute(vgetexpps128(a.as_f32x4(), src.as_f32x4(), k)) }
5391}
5392
5393/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5394///
5395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
5396#[inline]
5397#[target_feature(enable = "avx512f,avx512vl")]
5398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5399#[cfg_attr(test, assert_instr(vgetexpps))]
5400pub fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
5401    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, k)) }
5402}
5403
5404/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5405///
5406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
5407#[inline]
5408#[target_feature(enable = "avx512f")]
5409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5410#[cfg_attr(test, assert_instr(vgetexppd))]
5411pub fn _mm512_getexp_pd(a: __m512d) -> __m512d {
5412    unsafe {
5413        transmute(vgetexppd(
5414            a.as_f64x8(),
5415            f64x8::ZERO,
5416            0b11111111,
5417            _MM_FROUND_CUR_DIRECTION,
5418        ))
5419    }
5420}
5421
5422/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5423///
5424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
5425#[inline]
5426#[target_feature(enable = "avx512f")]
5427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5428#[cfg_attr(test, assert_instr(vgetexppd))]
5429pub fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5430    unsafe {
5431        transmute(vgetexppd(
5432            a.as_f64x8(),
5433            src.as_f64x8(),
5434            k,
5435            _MM_FROUND_CUR_DIRECTION,
5436        ))
5437    }
5438}
5439
5440/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
5443#[inline]
5444#[target_feature(enable = "avx512f")]
5445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5446#[cfg_attr(test, assert_instr(vgetexppd))]
5447pub fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
5448    unsafe {
5449        transmute(vgetexppd(
5450            a.as_f64x8(),
5451            f64x8::ZERO,
5452            k,
5453            _MM_FROUND_CUR_DIRECTION,
5454        ))
5455    }
5456}
5457
5458/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5459///
5460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5461#[inline]
5462#[target_feature(enable = "avx512f,avx512vl")]
5463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5464#[cfg_attr(test, assert_instr(vgetexppd))]
5465pub fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5466    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5467}
5468
5469/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5470///
5471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5472#[inline]
5473#[target_feature(enable = "avx512f,avx512vl")]
5474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5475#[cfg_attr(test, assert_instr(vgetexppd))]
5476pub fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5477    unsafe { transmute(vgetexppd256(a.as_f64x4(), src.as_f64x4(), k)) }
5478}
5479
5480/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5481///
5482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5483#[inline]
5484#[target_feature(enable = "avx512f,avx512vl")]
5485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5486#[cfg_attr(test, assert_instr(vgetexppd))]
5487pub fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5488    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, k)) }
5489}
5490
5491/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5492///
5493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5494#[inline]
5495#[target_feature(enable = "avx512f,avx512vl")]
5496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5497#[cfg_attr(test, assert_instr(vgetexppd))]
5498pub fn _mm_getexp_pd(a: __m128d) -> __m128d {
5499    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5500}
5501
5502/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5503///
5504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5505#[inline]
5506#[target_feature(enable = "avx512f,avx512vl")]
5507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5508#[cfg_attr(test, assert_instr(vgetexppd))]
5509pub fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5510    unsafe { transmute(vgetexppd128(a.as_f64x2(), src.as_f64x2(), k)) }
5511}
5512
5513/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5514///
5515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5516#[inline]
5517#[target_feature(enable = "avx512f,avx512vl")]
5518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5519#[cfg_attr(test, assert_instr(vgetexppd))]
5520pub fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5521    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, k)) }
5522}
5523
5524/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5525/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5526/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5527/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5528/// * [`_MM_FROUND_TO_POS_INF`] : round up
5529/// * [`_MM_FROUND_TO_ZERO`] : truncate
5530/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5531///
5532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5533#[inline]
5534#[target_feature(enable = "avx512f")]
5535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5536#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5537#[rustc_legacy_const_generics(1)]
5538pub fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5539    unsafe {
5540        static_assert_uimm_bits!(IMM8, 8);
5541        let a = a.as_f32x16();
5542        let r = vrndscaleps(
5543            a,
5544            IMM8,
5545            f32x16::ZERO,
5546            0b11111111_11111111,
5547            _MM_FROUND_CUR_DIRECTION,
5548        );
5549        transmute(r)
5550    }
5551}
5552
5553/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5554/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5555/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5556/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5557/// * [`_MM_FROUND_TO_POS_INF`] : round up
5558/// * [`_MM_FROUND_TO_ZERO`] : truncate
5559/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5560///
5561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5562#[inline]
5563#[target_feature(enable = "avx512f")]
5564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5565#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5566#[rustc_legacy_const_generics(3)]
5567pub fn _mm512_mask_roundscale_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5568    unsafe {
5569        static_assert_uimm_bits!(IMM8, 8);
5570        let a = a.as_f32x16();
5571        let src = src.as_f32x16();
5572        let r = vrndscaleps(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5573        transmute(r)
5574    }
5575}
5576
5577/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5578/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5579/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5580/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5581/// * [`_MM_FROUND_TO_POS_INF`] : round up
5582/// * [`_MM_FROUND_TO_ZERO`] : truncate
5583/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5586#[inline]
5587#[target_feature(enable = "avx512f")]
5588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5589#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5590#[rustc_legacy_const_generics(2)]
5591pub fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5592    unsafe {
5593        static_assert_uimm_bits!(IMM8, 8);
5594        let a = a.as_f32x16();
5595        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5596        transmute(r)
5597    }
5598}
5599
5600/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5601/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5602/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5603/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5604/// * [`_MM_FROUND_TO_POS_INF`] : round up
5605/// * [`_MM_FROUND_TO_ZERO`] : truncate
5606/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5607///
5608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5609#[inline]
5610#[target_feature(enable = "avx512f,avx512vl")]
5611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5612#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5613#[rustc_legacy_const_generics(1)]
5614pub fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5615    unsafe {
5616        static_assert_uimm_bits!(IMM8, 8);
5617        let a = a.as_f32x8();
5618        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, 0b11111111);
5619        transmute(r)
5620    }
5621}
5622
5623/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5624/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5625/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5626/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5627/// * [`_MM_FROUND_TO_POS_INF`] : round up
5628/// * [`_MM_FROUND_TO_ZERO`] : truncate
5629/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5630///
5631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5632#[inline]
5633#[target_feature(enable = "avx512f,avx512vl")]
5634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5635#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5636#[rustc_legacy_const_generics(3)]
5637pub fn _mm256_mask_roundscale_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5638    unsafe {
5639        static_assert_uimm_bits!(IMM8, 8);
5640        let a = a.as_f32x8();
5641        let src = src.as_f32x8();
5642        let r = vrndscaleps256(a, IMM8, src, k);
5643        transmute(r)
5644    }
5645}
5646
5647/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5648/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5649/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5650/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5651/// * [`_MM_FROUND_TO_POS_INF`] : round up
5652/// * [`_MM_FROUND_TO_ZERO`] : truncate
5653/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5654///
5655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5656#[inline]
5657#[target_feature(enable = "avx512f,avx512vl")]
5658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5659#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5660#[rustc_legacy_const_generics(2)]
5661pub fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5662    unsafe {
5663        static_assert_uimm_bits!(IMM8, 8);
5664        let a = a.as_f32x8();
5665        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, k);
5666        transmute(r)
5667    }
5668}
5669
5670/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5671/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5672/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5673/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5674/// * [`_MM_FROUND_TO_POS_INF`] : round up
5675/// * [`_MM_FROUND_TO_ZERO`] : truncate
5676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5677///
5678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5679#[inline]
5680#[target_feature(enable = "avx512f,avx512vl")]
5681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5682#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5683#[rustc_legacy_const_generics(1)]
5684pub fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5685    unsafe {
5686        static_assert_uimm_bits!(IMM8, 8);
5687        let a = a.as_f32x4();
5688        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, 0b00001111);
5689        transmute(r)
5690    }
5691}
5692
5693/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5694/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5695/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5696/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5697/// * [`_MM_FROUND_TO_POS_INF`] : round up
5698/// * [`_MM_FROUND_TO_ZERO`] : truncate
5699/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5700///
5701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5702#[inline]
5703#[target_feature(enable = "avx512f,avx512vl")]
5704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5705#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5706#[rustc_legacy_const_generics(3)]
5707pub fn _mm_mask_roundscale_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5708    unsafe {
5709        static_assert_uimm_bits!(IMM8, 8);
5710        let a = a.as_f32x4();
5711        let src = src.as_f32x4();
5712        let r = vrndscaleps128(a, IMM8, src, k);
5713        transmute(r)
5714    }
5715}
5716
5717/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5718/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5719/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5720/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5721/// * [`_MM_FROUND_TO_POS_INF`] : round up
5722/// * [`_MM_FROUND_TO_ZERO`] : truncate
5723/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5724///
5725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
5726#[inline]
5727#[target_feature(enable = "avx512f,avx512vl")]
5728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5729#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5730#[rustc_legacy_const_generics(2)]
5731pub fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
5732    unsafe {
5733        static_assert_uimm_bits!(IMM8, 8);
5734        let a = a.as_f32x4();
5735        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, k);
5736        transmute(r)
5737    }
5738}
5739
5740/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5741/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5742/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5743/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5744/// * [`_MM_FROUND_TO_POS_INF`] : round up
5745/// * [`_MM_FROUND_TO_ZERO`] : truncate
5746/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5747///
5748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
5749#[inline]
5750#[target_feature(enable = "avx512f")]
5751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5752#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5753#[rustc_legacy_const_generics(1)]
5754pub fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
5755    unsafe {
5756        static_assert_uimm_bits!(IMM8, 8);
5757        let a = a.as_f64x8();
5758        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION);
5759        transmute(r)
5760    }
5761}
5762
5763/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5764/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5765/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5766/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5767/// * [`_MM_FROUND_TO_POS_INF`] : round up
5768/// * [`_MM_FROUND_TO_ZERO`] : truncate
5769/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5770///
5771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
5772#[inline]
5773#[target_feature(enable = "avx512f")]
5774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5775#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5776#[rustc_legacy_const_generics(3)]
5777pub fn _mm512_mask_roundscale_pd<const IMM8: i32>(
5778    src: __m512d,
5779    k: __mmask8,
5780    a: __m512d,
5781) -> __m512d {
5782    unsafe {
5783        static_assert_uimm_bits!(IMM8, 8);
5784        let a = a.as_f64x8();
5785        let src = src.as_f64x8();
5786        let r = vrndscalepd(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5787        transmute(r)
5788    }
5789}
5790
5791/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5792/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5793/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5794/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5795/// * [`_MM_FROUND_TO_POS_INF`] : round up
5796/// * [`_MM_FROUND_TO_ZERO`] : truncate
5797/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5798///
5799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
5800#[inline]
5801#[target_feature(enable = "avx512f")]
5802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5803#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5804#[rustc_legacy_const_generics(2)]
5805pub fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
5806    unsafe {
5807        static_assert_uimm_bits!(IMM8, 8);
5808        let a = a.as_f64x8();
5809        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5810        transmute(r)
5811    }
5812}
5813
5814/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5815/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5816/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5817/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5818/// * [`_MM_FROUND_TO_POS_INF`] : round up
5819/// * [`_MM_FROUND_TO_ZERO`] : truncate
5820/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5821///
5822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
5823#[inline]
5824#[target_feature(enable = "avx512f,avx512vl")]
5825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5826#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5827#[rustc_legacy_const_generics(1)]
5828pub fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
5829    unsafe {
5830        static_assert_uimm_bits!(IMM8, 8);
5831        let a = a.as_f64x4();
5832        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, 0b00001111);
5833        transmute(r)
5834    }
5835}
5836
5837/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5838/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5839/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5840/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5841/// * [`_MM_FROUND_TO_POS_INF`] : round up
5842/// * [`_MM_FROUND_TO_ZERO`] : truncate
5843/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5844///
5845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
5846#[inline]
5847#[target_feature(enable = "avx512f,avx512vl")]
5848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5849#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5850#[rustc_legacy_const_generics(3)]
5851pub fn _mm256_mask_roundscale_pd<const IMM8: i32>(
5852    src: __m256d,
5853    k: __mmask8,
5854    a: __m256d,
5855) -> __m256d {
5856    unsafe {
5857        static_assert_uimm_bits!(IMM8, 8);
5858        let a = a.as_f64x4();
5859        let src = src.as_f64x4();
5860        let r = vrndscalepd256(a, IMM8, src, k);
5861        transmute(r)
5862    }
5863}
5864
5865/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5866/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5867/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5868/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5869/// * [`_MM_FROUND_TO_POS_INF`] : round up
5870/// * [`_MM_FROUND_TO_ZERO`] : truncate
5871/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5872///
5873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
5874#[inline]
5875#[target_feature(enable = "avx512f,avx512vl")]
5876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5877#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5878#[rustc_legacy_const_generics(2)]
5879pub fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
5880    unsafe {
5881        static_assert_uimm_bits!(IMM8, 8);
5882        let a = a.as_f64x4();
5883        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, k);
5884        transmute(r)
5885    }
5886}
5887
5888/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5889/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5890/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5891/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5892/// * [`_MM_FROUND_TO_POS_INF`] : round up
5893/// * [`_MM_FROUND_TO_ZERO`] : truncate
5894/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5895///
5896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
5897#[inline]
5898#[target_feature(enable = "avx512f,avx512vl")]
5899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5900#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5901#[rustc_legacy_const_generics(1)]
5902pub fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
5903    unsafe {
5904        static_assert_uimm_bits!(IMM8, 8);
5905        let a = a.as_f64x2();
5906        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, 0b00000011);
5907        transmute(r)
5908    }
5909}
5910
5911/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5912/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5913/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5914/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5915/// * [`_MM_FROUND_TO_POS_INF`] : round up
5916/// * [`_MM_FROUND_TO_ZERO`] : truncate
5917/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5918///
5919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
5920#[inline]
5921#[target_feature(enable = "avx512f,avx512vl")]
5922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5923#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5924#[rustc_legacy_const_generics(3)]
5925pub fn _mm_mask_roundscale_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5926    unsafe {
5927        static_assert_uimm_bits!(IMM8, 8);
5928        let a = a.as_f64x2();
5929        let src = src.as_f64x2();
5930        let r = vrndscalepd128(a, IMM8, src, k);
5931        transmute(r)
5932    }
5933}
5934
5935/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5936/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5937/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5938/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5939/// * [`_MM_FROUND_TO_POS_INF`] : round up
5940/// * [`_MM_FROUND_TO_ZERO`] : truncate
5941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5942///
5943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
5944#[inline]
5945#[target_feature(enable = "avx512f,avx512vl")]
5946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5947#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5948#[rustc_legacy_const_generics(2)]
5949pub fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
5950    unsafe {
5951        static_assert_uimm_bits!(IMM8, 8);
5952        let a = a.as_f64x2();
5953        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, k);
5954        transmute(r)
5955    }
5956}
5957
5958/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5959///
5960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
5961#[inline]
5962#[target_feature(enable = "avx512f")]
5963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5964#[cfg_attr(test, assert_instr(vscalefps))]
5965pub fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
5966    unsafe {
5967        transmute(vscalefps(
5968            a.as_f32x16(),
5969            b.as_f32x16(),
5970            f32x16::ZERO,
5971            0b11111111_11111111,
5972            _MM_FROUND_CUR_DIRECTION,
5973        ))
5974    }
5975}
5976
5977/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5978///
5979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
5980#[inline]
5981#[target_feature(enable = "avx512f")]
5982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5983#[cfg_attr(test, assert_instr(vscalefps))]
5984pub fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
5985    unsafe {
5986        transmute(vscalefps(
5987            a.as_f32x16(),
5988            b.as_f32x16(),
5989            src.as_f32x16(),
5990            k,
5991            _MM_FROUND_CUR_DIRECTION,
5992        ))
5993    }
5994}
5995
5996/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5997///
5998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
5999#[inline]
6000#[target_feature(enable = "avx512f")]
6001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6002#[cfg_attr(test, assert_instr(vscalefps))]
6003pub fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
6004    unsafe {
6005        transmute(vscalefps(
6006            a.as_f32x16(),
6007            b.as_f32x16(),
6008            f32x16::ZERO,
6009            k,
6010            _MM_FROUND_CUR_DIRECTION,
6011        ))
6012    }
6013}
6014
6015/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6016///
6017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
6018#[inline]
6019#[target_feature(enable = "avx512f,avx512vl")]
6020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6021#[cfg_attr(test, assert_instr(vscalefps))]
6022pub fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
6023    unsafe {
6024        transmute(vscalefps256(
6025            a.as_f32x8(),
6026            b.as_f32x8(),
6027            f32x8::ZERO,
6028            0b11111111,
6029        ))
6030    }
6031}
6032
6033/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6034///
6035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
6036#[inline]
6037#[target_feature(enable = "avx512f,avx512vl")]
6038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6039#[cfg_attr(test, assert_instr(vscalefps))]
6040pub fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
6041    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), k)) }
6042}
6043
6044/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6045///
6046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
6047#[inline]
6048#[target_feature(enable = "avx512f,avx512vl")]
6049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6050#[cfg_attr(test, assert_instr(vscalefps))]
6051pub fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
6052    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), f32x8::ZERO, k)) }
6053}
6054
6055/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6056///
6057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
6058#[inline]
6059#[target_feature(enable = "avx512f,avx512vl")]
6060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6061#[cfg_attr(test, assert_instr(vscalefps))]
6062pub fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
6063    unsafe {
6064        transmute(vscalefps128(
6065            a.as_f32x4(),
6066            b.as_f32x4(),
6067            f32x4::ZERO,
6068            0b00001111,
6069        ))
6070    }
6071}
6072
6073/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6074///
6075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
6076#[inline]
6077#[target_feature(enable = "avx512f,avx512vl")]
6078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6079#[cfg_attr(test, assert_instr(vscalefps))]
6080pub fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
6081    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
6082}
6083
6084/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6085///
6086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
6087#[inline]
6088#[target_feature(enable = "avx512f,avx512vl")]
6089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6090#[cfg_attr(test, assert_instr(vscalefps))]
6091pub fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6092    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
6093}
6094
6095/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6096///
6097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
6098#[inline]
6099#[target_feature(enable = "avx512f")]
6100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6101#[cfg_attr(test, assert_instr(vscalefpd))]
6102pub fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
6103    unsafe {
6104        transmute(vscalefpd(
6105            a.as_f64x8(),
6106            b.as_f64x8(),
6107            f64x8::ZERO,
6108            0b11111111,
6109            _MM_FROUND_CUR_DIRECTION,
6110        ))
6111    }
6112}
6113
6114/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6115///
6116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
6117#[inline]
6118#[target_feature(enable = "avx512f")]
6119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6120#[cfg_attr(test, assert_instr(vscalefpd))]
6121pub fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6122    unsafe {
6123        transmute(vscalefpd(
6124            a.as_f64x8(),
6125            b.as_f64x8(),
6126            src.as_f64x8(),
6127            k,
6128            _MM_FROUND_CUR_DIRECTION,
6129        ))
6130    }
6131}
6132
6133/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6134///
6135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
6136#[inline]
6137#[target_feature(enable = "avx512f")]
6138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6139#[cfg_attr(test, assert_instr(vscalefpd))]
6140pub fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6141    unsafe {
6142        transmute(vscalefpd(
6143            a.as_f64x8(),
6144            b.as_f64x8(),
6145            f64x8::ZERO,
6146            k,
6147            _MM_FROUND_CUR_DIRECTION,
6148        ))
6149    }
6150}
6151
6152/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6153///
6154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
6155#[inline]
6156#[target_feature(enable = "avx512f,avx512vl")]
6157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6158#[cfg_attr(test, assert_instr(vscalefpd))]
6159pub fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
6160    unsafe {
6161        transmute(vscalefpd256(
6162            a.as_f64x4(),
6163            b.as_f64x4(),
6164            f64x4::ZERO,
6165            0b00001111,
6166        ))
6167    }
6168}
6169
6170/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6171///
6172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
6173#[inline]
6174#[target_feature(enable = "avx512f,avx512vl")]
6175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6176#[cfg_attr(test, assert_instr(vscalefpd))]
6177pub fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6178    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), k)) }
6179}
6180
6181/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6182///
6183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
6184#[inline]
6185#[target_feature(enable = "avx512f,avx512vl")]
6186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6187#[cfg_attr(test, assert_instr(vscalefpd))]
6188pub fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6189    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), f64x4::ZERO, k)) }
6190}
6191
6192/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6193///
6194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
6195#[inline]
6196#[target_feature(enable = "avx512f,avx512vl")]
6197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6198#[cfg_attr(test, assert_instr(vscalefpd))]
6199pub fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
6200    unsafe {
6201        transmute(vscalefpd128(
6202            a.as_f64x2(),
6203            b.as_f64x2(),
6204            f64x2::ZERO,
6205            0b00000011,
6206        ))
6207    }
6208}
6209
6210/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6211///
6212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
6213#[inline]
6214#[target_feature(enable = "avx512f,avx512vl")]
6215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6216#[cfg_attr(test, assert_instr(vscalefpd))]
6217pub fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6218    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
6219}
6220
6221/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6222///
6223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
6224#[inline]
6225#[target_feature(enable = "avx512f,avx512vl")]
6226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6227#[cfg_attr(test, assert_instr(vscalefpd))]
6228pub fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6229    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
6230}
6231
6232/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6233///
6234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
6235#[inline]
6236#[target_feature(enable = "avx512f")]
6237#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6238#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6239#[rustc_legacy_const_generics(3)]
6240pub fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
6241    unsafe {
6242        static_assert_uimm_bits!(IMM8, 8);
6243        let a = a.as_f32x16();
6244        let b = b.as_f32x16();
6245        let c = c.as_i32x16();
6246        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION);
6247        transmute(r)
6248    }
6249}
6250
6251/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6252///
6253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
6254#[inline]
6255#[target_feature(enable = "avx512f")]
6256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6257#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6258#[rustc_legacy_const_generics(4)]
6259pub fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
6260    a: __m512,
6261    k: __mmask16,
6262    b: __m512,
6263    c: __m512i,
6264) -> __m512 {
6265    unsafe {
6266        static_assert_uimm_bits!(IMM8, 8);
6267        let a = a.as_f32x16();
6268        let b = b.as_f32x16();
6269        let c = c.as_i32x16();
6270        let r = vfixupimmps(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6271        transmute(r)
6272    }
6273}
6274
6275/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6276///
6277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
6278#[inline]
6279#[target_feature(enable = "avx512f")]
6280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6281#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6282#[rustc_legacy_const_generics(4)]
6283pub fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
6284    k: __mmask16,
6285    a: __m512,
6286    b: __m512,
6287    c: __m512i,
6288) -> __m512 {
6289    unsafe {
6290        static_assert_uimm_bits!(IMM8, 8);
6291        let a = a.as_f32x16();
6292        let b = b.as_f32x16();
6293        let c = c.as_i32x16();
6294        let r = vfixupimmpsz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6295        transmute(r)
6296    }
6297}
6298
6299/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6300///
6301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
6302#[inline]
6303#[target_feature(enable = "avx512f,avx512vl")]
6304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6305#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6306#[rustc_legacy_const_generics(3)]
6307pub fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
6308    unsafe {
6309        static_assert_uimm_bits!(IMM8, 8);
6310        let a = a.as_f32x8();
6311        let b = b.as_f32x8();
6312        let c = c.as_i32x8();
6313        let r = vfixupimmps256(a, b, c, IMM8, 0b11111111);
6314        transmute(r)
6315    }
6316}
6317
6318/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6319///
6320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
6321#[inline]
6322#[target_feature(enable = "avx512f,avx512vl")]
6323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6324#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6325#[rustc_legacy_const_generics(4)]
6326pub fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
6327    a: __m256,
6328    k: __mmask8,
6329    b: __m256,
6330    c: __m256i,
6331) -> __m256 {
6332    unsafe {
6333        static_assert_uimm_bits!(IMM8, 8);
6334        let a = a.as_f32x8();
6335        let b = b.as_f32x8();
6336        let c = c.as_i32x8();
6337        let r = vfixupimmps256(a, b, c, IMM8, k);
6338        transmute(r)
6339    }
6340}
6341
6342/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6343///
6344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
6345#[inline]
6346#[target_feature(enable = "avx512f,avx512vl")]
6347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6348#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6349#[rustc_legacy_const_generics(4)]
6350pub fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
6351    k: __mmask8,
6352    a: __m256,
6353    b: __m256,
6354    c: __m256i,
6355) -> __m256 {
6356    unsafe {
6357        static_assert_uimm_bits!(IMM8, 8);
6358        let a = a.as_f32x8();
6359        let b = b.as_f32x8();
6360        let c = c.as_i32x8();
6361        let r = vfixupimmpsz256(a, b, c, IMM8, k);
6362        transmute(r)
6363    }
6364}
6365
6366/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6367///
6368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
6369#[inline]
6370#[target_feature(enable = "avx512f,avx512vl")]
6371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6372#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6373#[rustc_legacy_const_generics(3)]
6374pub fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
6375    unsafe {
6376        static_assert_uimm_bits!(IMM8, 8);
6377        let a = a.as_f32x4();
6378        let b = b.as_f32x4();
6379        let c = c.as_i32x4();
6380        let r = vfixupimmps128(a, b, c, IMM8, 0b00001111);
6381        transmute(r)
6382    }
6383}
6384
6385/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6386///
6387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
6388#[inline]
6389#[target_feature(enable = "avx512f,avx512vl")]
6390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6391#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6392#[rustc_legacy_const_generics(4)]
6393pub fn _mm_mask_fixupimm_ps<const IMM8: i32>(
6394    a: __m128,
6395    k: __mmask8,
6396    b: __m128,
6397    c: __m128i,
6398) -> __m128 {
6399    unsafe {
6400        static_assert_uimm_bits!(IMM8, 8);
6401        let a = a.as_f32x4();
6402        let b = b.as_f32x4();
6403        let c = c.as_i32x4();
6404        let r = vfixupimmps128(a, b, c, IMM8, k);
6405        transmute(r)
6406    }
6407}
6408
6409/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6410///
6411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
6412#[inline]
6413#[target_feature(enable = "avx512f,avx512vl")]
6414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6415#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6416#[rustc_legacy_const_generics(4)]
6417pub fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
6418    k: __mmask8,
6419    a: __m128,
6420    b: __m128,
6421    c: __m128i,
6422) -> __m128 {
6423    unsafe {
6424        static_assert_uimm_bits!(IMM8, 8);
6425        let a = a.as_f32x4();
6426        let b = b.as_f32x4();
6427        let c = c.as_i32x4();
6428        let r = vfixupimmpsz128(a, b, c, IMM8, k);
6429        transmute(r)
6430    }
6431}
6432
6433/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6434///
6435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
6436#[inline]
6437#[target_feature(enable = "avx512f")]
6438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6439#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6440#[rustc_legacy_const_generics(3)]
6441pub fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
6442    unsafe {
6443        static_assert_uimm_bits!(IMM8, 8);
6444        let a = a.as_f64x8();
6445        let b = b.as_f64x8();
6446        let c = c.as_i64x8();
6447        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
6448        transmute(r)
6449    }
6450}
6451
6452/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6453///
6454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
6455#[inline]
6456#[target_feature(enable = "avx512f")]
6457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6458#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6459#[rustc_legacy_const_generics(4)]
6460pub fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
6461    a: __m512d,
6462    k: __mmask8,
6463    b: __m512d,
6464    c: __m512i,
6465) -> __m512d {
6466    unsafe {
6467        static_assert_uimm_bits!(IMM8, 8);
6468        let a = a.as_f64x8();
6469        let b = b.as_f64x8();
6470        let c = c.as_i64x8();
6471        let r = vfixupimmpd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6472        transmute(r)
6473    }
6474}
6475
6476/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6477///
6478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
6479#[inline]
6480#[target_feature(enable = "avx512f")]
6481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6482#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6483#[rustc_legacy_const_generics(4)]
6484pub fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
6485    k: __mmask8,
6486    a: __m512d,
6487    b: __m512d,
6488    c: __m512i,
6489) -> __m512d {
6490    unsafe {
6491        static_assert_uimm_bits!(IMM8, 8);
6492        let a = a.as_f64x8();
6493        let b = b.as_f64x8();
6494        let c = c.as_i64x8();
6495        let r = vfixupimmpdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6496        transmute(r)
6497    }
6498}
6499
6500/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6501///
6502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
6503#[inline]
6504#[target_feature(enable = "avx512f,avx512vl")]
6505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6506#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6507#[rustc_legacy_const_generics(3)]
6508pub fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
6509    unsafe {
6510        static_assert_uimm_bits!(IMM8, 8);
6511        let a = a.as_f64x4();
6512        let b = b.as_f64x4();
6513        let c = c.as_i64x4();
6514        let r = vfixupimmpd256(a, b, c, IMM8, 0b00001111);
6515        transmute(r)
6516    }
6517}
6518
6519/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6520///
6521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6522#[inline]
6523#[target_feature(enable = "avx512f,avx512vl")]
6524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6525#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6526#[rustc_legacy_const_generics(4)]
6527pub fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6528    a: __m256d,
6529    k: __mmask8,
6530    b: __m256d,
6531    c: __m256i,
6532) -> __m256d {
6533    unsafe {
6534        static_assert_uimm_bits!(IMM8, 8);
6535        let a = a.as_f64x4();
6536        let b = b.as_f64x4();
6537        let c = c.as_i64x4();
6538        let r = vfixupimmpd256(a, b, c, IMM8, k);
6539        transmute(r)
6540    }
6541}
6542
6543/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6544///
6545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6546#[inline]
6547#[target_feature(enable = "avx512f,avx512vl")]
6548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6549#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6550#[rustc_legacy_const_generics(4)]
6551pub fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6552    k: __mmask8,
6553    a: __m256d,
6554    b: __m256d,
6555    c: __m256i,
6556) -> __m256d {
6557    unsafe {
6558        static_assert_uimm_bits!(IMM8, 8);
6559        let a = a.as_f64x4();
6560        let b = b.as_f64x4();
6561        let c = c.as_i64x4();
6562        let r = vfixupimmpdz256(a, b, c, IMM8, k);
6563        transmute(r)
6564    }
6565}
6566
6567/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6568///
6569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6570#[inline]
6571#[target_feature(enable = "avx512f,avx512vl")]
6572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6573#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6574#[rustc_legacy_const_generics(3)]
6575pub fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6576    unsafe {
6577        static_assert_uimm_bits!(IMM8, 8);
6578        let a = a.as_f64x2();
6579        let b = b.as_f64x2();
6580        let c = c.as_i64x2();
6581        let r = vfixupimmpd128(a, b, c, IMM8, 0b00000011);
6582        transmute(r)
6583    }
6584}
6585
6586/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6587///
6588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6589#[inline]
6590#[target_feature(enable = "avx512f,avx512vl")]
6591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6592#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6593#[rustc_legacy_const_generics(4)]
6594pub fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6595    a: __m128d,
6596    k: __mmask8,
6597    b: __m128d,
6598    c: __m128i,
6599) -> __m128d {
6600    unsafe {
6601        static_assert_uimm_bits!(IMM8, 8);
6602        let a = a.as_f64x2();
6603        let b = b.as_f64x2();
6604        let c = c.as_i64x2();
6605        let r = vfixupimmpd128(a, b, c, IMM8, k);
6606        transmute(r)
6607    }
6608}
6609
6610/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6611///
6612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6613#[inline]
6614#[target_feature(enable = "avx512f,avx512vl")]
6615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6616#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6617#[rustc_legacy_const_generics(4)]
6618pub fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6619    k: __mmask8,
6620    a: __m128d,
6621    b: __m128d,
6622    c: __m128i,
6623) -> __m128d {
6624    unsafe {
6625        static_assert_uimm_bits!(IMM8, 8);
6626        let a = a.as_f64x2();
6627        let b = b.as_f64x2();
6628        let c = c.as_i64x2();
6629        let r = vfixupimmpdz128(a, b, c, IMM8, k);
6630        transmute(r)
6631    }
6632}
6633
6634/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6635///
6636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6637#[inline]
6638#[target_feature(enable = "avx512f")]
6639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6640#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6641#[rustc_legacy_const_generics(3)]
6642pub fn _mm512_ternarylogic_epi32<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6643    unsafe {
6644        static_assert_uimm_bits!(IMM8, 8);
6645        let a = a.as_i32x16();
6646        let b = b.as_i32x16();
6647        let c = c.as_i32x16();
6648        let r = vpternlogd(a, b, c, IMM8);
6649        transmute(r)
6650    }
6651}
6652
6653/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6654///
6655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6656#[inline]
6657#[target_feature(enable = "avx512f")]
6658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6659#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6660#[rustc_legacy_const_generics(4)]
6661pub fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6662    src: __m512i,
6663    k: __mmask16,
6664    a: __m512i,
6665    b: __m512i,
6666) -> __m512i {
6667    unsafe {
6668        static_assert_uimm_bits!(IMM8, 8);
6669        let src = src.as_i32x16();
6670        let a = a.as_i32x16();
6671        let b = b.as_i32x16();
6672        let r = vpternlogd(src, a, b, IMM8);
6673        transmute(simd_select_bitmask(k, r, src))
6674    }
6675}
6676
6677/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6678///
6679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6680#[inline]
6681#[target_feature(enable = "avx512f")]
6682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6683#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6684#[rustc_legacy_const_generics(4)]
6685pub fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6686    k: __mmask16,
6687    a: __m512i,
6688    b: __m512i,
6689    c: __m512i,
6690) -> __m512i {
6691    unsafe {
6692        static_assert_uimm_bits!(IMM8, 8);
6693        let a = a.as_i32x16();
6694        let b = b.as_i32x16();
6695        let c = c.as_i32x16();
6696        let r = vpternlogd(a, b, c, IMM8);
6697        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
6698    }
6699}
6700
6701/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6702///
6703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6704#[inline]
6705#[target_feature(enable = "avx512f,avx512vl")]
6706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6707#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6708#[rustc_legacy_const_generics(3)]
6709pub fn _mm256_ternarylogic_epi32<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6710    unsafe {
6711        static_assert_uimm_bits!(IMM8, 8);
6712        let a = a.as_i32x8();
6713        let b = b.as_i32x8();
6714        let c = c.as_i32x8();
6715        let r = vpternlogd256(a, b, c, IMM8);
6716        transmute(r)
6717    }
6718}
6719
6720/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6721///
6722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
6723#[inline]
6724#[target_feature(enable = "avx512f,avx512vl")]
6725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6726#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6727#[rustc_legacy_const_generics(4)]
6728pub fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
6729    src: __m256i,
6730    k: __mmask8,
6731    a: __m256i,
6732    b: __m256i,
6733) -> __m256i {
6734    unsafe {
6735        static_assert_uimm_bits!(IMM8, 8);
6736        let src = src.as_i32x8();
6737        let a = a.as_i32x8();
6738        let b = b.as_i32x8();
6739        let r = vpternlogd256(src, a, b, IMM8);
6740        transmute(simd_select_bitmask(k, r, src))
6741    }
6742}
6743
6744/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6745///
6746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
6747#[inline]
6748#[target_feature(enable = "avx512f,avx512vl")]
6749#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6750#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6751#[rustc_legacy_const_generics(4)]
6752pub fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
6753    k: __mmask8,
6754    a: __m256i,
6755    b: __m256i,
6756    c: __m256i,
6757) -> __m256i {
6758    unsafe {
6759        static_assert_uimm_bits!(IMM8, 8);
6760        let a = a.as_i32x8();
6761        let b = b.as_i32x8();
6762        let c = c.as_i32x8();
6763        let r = vpternlogd256(a, b, c, IMM8);
6764        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
6765    }
6766}
6767
6768/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6769///
6770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
6771#[inline]
6772#[target_feature(enable = "avx512f,avx512vl")]
6773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6774#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6775#[rustc_legacy_const_generics(3)]
6776pub fn _mm_ternarylogic_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6777    unsafe {
6778        static_assert_uimm_bits!(IMM8, 8);
6779        let a = a.as_i32x4();
6780        let b = b.as_i32x4();
6781        let c = c.as_i32x4();
6782        let r = vpternlogd128(a, b, c, IMM8);
6783        transmute(r)
6784    }
6785}
6786
6787/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6788///
6789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
6790#[inline]
6791#[target_feature(enable = "avx512f,avx512vl")]
6792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6793#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6794#[rustc_legacy_const_generics(4)]
6795pub fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
6796    src: __m128i,
6797    k: __mmask8,
6798    a: __m128i,
6799    b: __m128i,
6800) -> __m128i {
6801    unsafe {
6802        static_assert_uimm_bits!(IMM8, 8);
6803        let src = src.as_i32x4();
6804        let a = a.as_i32x4();
6805        let b = b.as_i32x4();
6806        let r = vpternlogd128(src, a, b, IMM8);
6807        transmute(simd_select_bitmask(k, r, src))
6808    }
6809}
6810
6811/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6812///
6813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
6814#[inline]
6815#[target_feature(enable = "avx512f,avx512vl")]
6816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6817#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6818#[rustc_legacy_const_generics(4)]
6819pub fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
6820    k: __mmask8,
6821    a: __m128i,
6822    b: __m128i,
6823    c: __m128i,
6824) -> __m128i {
6825    unsafe {
6826        static_assert_uimm_bits!(IMM8, 8);
6827        let a = a.as_i32x4();
6828        let b = b.as_i32x4();
6829        let c = c.as_i32x4();
6830        let r = vpternlogd128(a, b, c, IMM8);
6831        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
6832    }
6833}
6834
6835/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6836///
6837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
6838#[inline]
6839#[target_feature(enable = "avx512f")]
6840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6841#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6842#[rustc_legacy_const_generics(3)]
6843pub fn _mm512_ternarylogic_epi64<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6844    unsafe {
6845        static_assert_uimm_bits!(IMM8, 8);
6846        let a = a.as_i64x8();
6847        let b = b.as_i64x8();
6848        let c = c.as_i64x8();
6849        let r = vpternlogq(a, b, c, IMM8);
6850        transmute(r)
6851    }
6852}
6853
6854/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6855///
6856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
6857#[inline]
6858#[target_feature(enable = "avx512f")]
6859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6860#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6861#[rustc_legacy_const_generics(4)]
6862pub fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
6863    src: __m512i,
6864    k: __mmask8,
6865    a: __m512i,
6866    b: __m512i,
6867) -> __m512i {
6868    unsafe {
6869        static_assert_uimm_bits!(IMM8, 8);
6870        let src = src.as_i64x8();
6871        let a = a.as_i64x8();
6872        let b = b.as_i64x8();
6873        let r = vpternlogq(src, a, b, IMM8);
6874        transmute(simd_select_bitmask(k, r, src))
6875    }
6876}
6877
6878/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6879///
6880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
6881#[inline]
6882#[target_feature(enable = "avx512f")]
6883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6884#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6885#[rustc_legacy_const_generics(4)]
6886pub fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
6887    k: __mmask8,
6888    a: __m512i,
6889    b: __m512i,
6890    c: __m512i,
6891) -> __m512i {
6892    unsafe {
6893        static_assert_uimm_bits!(IMM8, 8);
6894        let a = a.as_i64x8();
6895        let b = b.as_i64x8();
6896        let c = c.as_i64x8();
6897        let r = vpternlogq(a, b, c, IMM8);
6898        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
6899    }
6900}
6901
6902/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6903///
6904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
6905#[inline]
6906#[target_feature(enable = "avx512f,avx512vl")]
6907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6908#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6909#[rustc_legacy_const_generics(3)]
6910pub fn _mm256_ternarylogic_epi64<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6911    unsafe {
6912        static_assert_uimm_bits!(IMM8, 8);
6913        let a = a.as_i64x4();
6914        let b = b.as_i64x4();
6915        let c = c.as_i64x4();
6916        let r = vpternlogq256(a, b, c, IMM8);
6917        transmute(r)
6918    }
6919}
6920
6921/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6922///
6923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
6924#[inline]
6925#[target_feature(enable = "avx512f,avx512vl")]
6926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6927#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6928#[rustc_legacy_const_generics(4)]
6929pub fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
6930    src: __m256i,
6931    k: __mmask8,
6932    a: __m256i,
6933    b: __m256i,
6934) -> __m256i {
6935    unsafe {
6936        static_assert_uimm_bits!(IMM8, 8);
6937        let src = src.as_i64x4();
6938        let a = a.as_i64x4();
6939        let b = b.as_i64x4();
6940        let r = vpternlogq256(src, a, b, IMM8);
6941        transmute(simd_select_bitmask(k, r, src))
6942    }
6943}
6944
6945/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6946///
6947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
6948#[inline]
6949#[target_feature(enable = "avx512f,avx512vl")]
6950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6951#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6952#[rustc_legacy_const_generics(4)]
6953pub fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
6954    k: __mmask8,
6955    a: __m256i,
6956    b: __m256i,
6957    c: __m256i,
6958) -> __m256i {
6959    unsafe {
6960        static_assert_uimm_bits!(IMM8, 8);
6961        let a = a.as_i64x4();
6962        let b = b.as_i64x4();
6963        let c = c.as_i64x4();
6964        let r = vpternlogq256(a, b, c, IMM8);
6965        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
6966    }
6967}
6968
6969/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6970///
6971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
6972#[inline]
6973#[target_feature(enable = "avx512f,avx512vl")]
6974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6975#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6976#[rustc_legacy_const_generics(3)]
6977pub fn _mm_ternarylogic_epi64<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6978    unsafe {
6979        static_assert_uimm_bits!(IMM8, 8);
6980        let a = a.as_i64x2();
6981        let b = b.as_i64x2();
6982        let c = c.as_i64x2();
6983        let r = vpternlogq128(a, b, c, IMM8);
6984        transmute(r)
6985    }
6986}
6987
6988/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6989///
6990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
6991#[inline]
6992#[target_feature(enable = "avx512f,avx512vl")]
6993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6994#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6995#[rustc_legacy_const_generics(4)]
6996pub fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
6997    src: __m128i,
6998    k: __mmask8,
6999    a: __m128i,
7000    b: __m128i,
7001) -> __m128i {
7002    unsafe {
7003        static_assert_uimm_bits!(IMM8, 8);
7004        let src = src.as_i64x2();
7005        let a = a.as_i64x2();
7006        let b = b.as_i64x2();
7007        let r = vpternlogq128(src, a, b, IMM8);
7008        transmute(simd_select_bitmask(k, r, src))
7009    }
7010}
7011
7012/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7013///
7014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
7015#[inline]
7016#[target_feature(enable = "avx512f,avx512vl")]
7017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7018#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7019#[rustc_legacy_const_generics(4)]
7020pub fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
7021    k: __mmask8,
7022    a: __m128i,
7023    b: __m128i,
7024    c: __m128i,
7025) -> __m128i {
7026    unsafe {
7027        static_assert_uimm_bits!(IMM8, 8);
7028        let a = a.as_i64x2();
7029        let b = b.as_i64x2();
7030        let c = c.as_i64x2();
7031        let r = vpternlogq128(a, b, c, IMM8);
7032        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
7033    }
7034}
7035
7036/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7037/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7038///    _MM_MANT_NORM_1_2     // interval [1, 2)
7039///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7040///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7041///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7042/// The sign is determined by sc which can take the following values:
7043///    _MM_MANT_SIGN_src     // sign = sign(src)
7044///    _MM_MANT_SIGN_zero    // sign = 0
7045///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7046///
7047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
7048#[inline]
7049#[target_feature(enable = "avx512f")]
7050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7051#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7052#[rustc_legacy_const_generics(1, 2)]
7053pub fn _mm512_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7054    a: __m512,
7055) -> __m512 {
7056    unsafe {
7057        static_assert_uimm_bits!(NORM, 4);
7058        static_assert_uimm_bits!(SIGN, 2);
7059        let a = a.as_f32x16();
7060        let zero = f32x16::ZERO;
7061        let r = vgetmantps(
7062            a,
7063            SIGN << 2 | NORM,
7064            zero,
7065            0b11111111_11111111,
7066            _MM_FROUND_CUR_DIRECTION,
7067        );
7068        transmute(r)
7069    }
7070}
7071
7072/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7073/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7074///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7075///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7076///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7077///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7078/// The sign is determined by sc which can take the following values:\
7079///    _MM_MANT_SIGN_src     // sign = sign(src)\
7080///    _MM_MANT_SIGN_zero    // sign = 0\
7081///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7082///
7083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
7084#[inline]
7085#[target_feature(enable = "avx512f")]
7086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7087#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7088#[rustc_legacy_const_generics(3, 4)]
7089pub fn _mm512_mask_getmant_ps<
7090    const NORM: _MM_MANTISSA_NORM_ENUM,
7091    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7092>(
7093    src: __m512,
7094    k: __mmask16,
7095    a: __m512,
7096) -> __m512 {
7097    unsafe {
7098        static_assert_uimm_bits!(NORM, 4);
7099        static_assert_uimm_bits!(SIGN, 2);
7100        let a = a.as_f32x16();
7101        let src = src.as_f32x16();
7102        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7103        transmute(r)
7104    }
7105}
7106
7107/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7108/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7109///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7110///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7111///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7112///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7113/// The sign is determined by sc which can take the following values:\
7114///    _MM_MANT_SIGN_src     // sign = sign(src)\
7115///    _MM_MANT_SIGN_zero    // sign = 0\
7116///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7117///
7118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
7119#[inline]
7120#[target_feature(enable = "avx512f")]
7121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7122#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7123#[rustc_legacy_const_generics(2, 3)]
7124pub fn _mm512_maskz_getmant_ps<
7125    const NORM: _MM_MANTISSA_NORM_ENUM,
7126    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7127>(
7128    k: __mmask16,
7129    a: __m512,
7130) -> __m512 {
7131    unsafe {
7132        static_assert_uimm_bits!(NORM, 4);
7133        static_assert_uimm_bits!(SIGN, 2);
7134        let a = a.as_f32x16();
7135        let r = vgetmantps(
7136            a,
7137            SIGN << 2 | NORM,
7138            f32x16::ZERO,
7139            k,
7140            _MM_FROUND_CUR_DIRECTION,
7141        );
7142        transmute(r)
7143    }
7144}
7145
7146/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7147/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7148///    _MM_MANT_NORM_1_2     // interval [1, 2)
7149///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7150///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7151///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7152/// The sign is determined by sc which can take the following values:
7153///    _MM_MANT_SIGN_src     // sign = sign(src)
7154///    _MM_MANT_SIGN_zero    // sign = 0
7155///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7156///
7157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
7158#[inline]
7159#[target_feature(enable = "avx512f,avx512vl")]
7160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7161#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7162#[rustc_legacy_const_generics(1, 2)]
7163pub fn _mm256_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7164    a: __m256,
7165) -> __m256 {
7166    unsafe {
7167        static_assert_uimm_bits!(NORM, 4);
7168        static_assert_uimm_bits!(SIGN, 2);
7169        let a = a.as_f32x8();
7170        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, 0b11111111);
7171        transmute(r)
7172    }
7173}
7174
7175/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7176/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7177///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7178///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7179///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7180///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7181/// The sign is determined by sc which can take the following values:\
7182///    _MM_MANT_SIGN_src     // sign = sign(src)\
7183///    _MM_MANT_SIGN_zero    // sign = 0\
7184///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7185///
7186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
7187#[inline]
7188#[target_feature(enable = "avx512f,avx512vl")]
7189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7190#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7191#[rustc_legacy_const_generics(3, 4)]
7192pub fn _mm256_mask_getmant_ps<
7193    const NORM: _MM_MANTISSA_NORM_ENUM,
7194    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7195>(
7196    src: __m256,
7197    k: __mmask8,
7198    a: __m256,
7199) -> __m256 {
7200    unsafe {
7201        static_assert_uimm_bits!(NORM, 4);
7202        static_assert_uimm_bits!(SIGN, 2);
7203        let a = a.as_f32x8();
7204        let src = src.as_f32x8();
7205        let r = vgetmantps256(a, SIGN << 2 | NORM, src, k);
7206        transmute(r)
7207    }
7208}
7209
7210/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7211/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7212///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7213///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7214///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7215///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7216/// The sign is determined by sc which can take the following values:\
7217///    _MM_MANT_SIGN_src     // sign = sign(src)\
7218///    _MM_MANT_SIGN_zero    // sign = 0\
7219///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7220///
7221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
7222#[inline]
7223#[target_feature(enable = "avx512f,avx512vl")]
7224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7225#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7226#[rustc_legacy_const_generics(2, 3)]
7227pub fn _mm256_maskz_getmant_ps<
7228    const NORM: _MM_MANTISSA_NORM_ENUM,
7229    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7230>(
7231    k: __mmask8,
7232    a: __m256,
7233) -> __m256 {
7234    unsafe {
7235        static_assert_uimm_bits!(NORM, 4);
7236        static_assert_uimm_bits!(SIGN, 2);
7237        let a = a.as_f32x8();
7238        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, k);
7239        transmute(r)
7240    }
7241}
7242
7243/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7244/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7245///    _MM_MANT_NORM_1_2     // interval [1, 2)
7246///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7247///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7248///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7249/// The sign is determined by sc which can take the following values:
7250///    _MM_MANT_SIGN_src     // sign = sign(src)
7251///    _MM_MANT_SIGN_zero    // sign = 0
7252///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7253///
7254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
7255#[inline]
7256#[target_feature(enable = "avx512f,avx512vl")]
7257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7258#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7259#[rustc_legacy_const_generics(1, 2)]
7260pub fn _mm_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7261    a: __m128,
7262) -> __m128 {
7263    unsafe {
7264        static_assert_uimm_bits!(NORM, 4);
7265        static_assert_uimm_bits!(SIGN, 2);
7266        let a = a.as_f32x4();
7267        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, 0b00001111);
7268        transmute(r)
7269    }
7270}
7271
7272/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7273/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7274///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7275///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7276///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7277///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7278/// The sign is determined by sc which can take the following values:\
7279///    _MM_MANT_SIGN_src     // sign = sign(src)\
7280///    _MM_MANT_SIGN_zero    // sign = 0\
7281///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7282///
7283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
7284#[inline]
7285#[target_feature(enable = "avx512f,avx512vl")]
7286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7287#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7288#[rustc_legacy_const_generics(3, 4)]
7289pub fn _mm_mask_getmant_ps<
7290    const NORM: _MM_MANTISSA_NORM_ENUM,
7291    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7292>(
7293    src: __m128,
7294    k: __mmask8,
7295    a: __m128,
7296) -> __m128 {
7297    unsafe {
7298        static_assert_uimm_bits!(NORM, 4);
7299        static_assert_uimm_bits!(SIGN, 2);
7300        let a = a.as_f32x4();
7301        let src = src.as_f32x4();
7302        let r = vgetmantps128(a, SIGN << 2 | NORM, src, k);
7303        transmute(r)
7304    }
7305}
7306
7307/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7308/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7309///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7310///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7311///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7312///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7313/// The sign is determined by sc which can take the following values:\
7314///    _MM_MANT_SIGN_src     // sign = sign(src)\
7315///    _MM_MANT_SIGN_zero    // sign = 0\
7316///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7317///
7318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
7319#[inline]
7320#[target_feature(enable = "avx512f,avx512vl")]
7321#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7322#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7323#[rustc_legacy_const_generics(2, 3)]
7324pub fn _mm_maskz_getmant_ps<
7325    const NORM: _MM_MANTISSA_NORM_ENUM,
7326    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7327>(
7328    k: __mmask8,
7329    a: __m128,
7330) -> __m128 {
7331    unsafe {
7332        static_assert_uimm_bits!(NORM, 4);
7333        static_assert_uimm_bits!(SIGN, 2);
7334        let a = a.as_f32x4();
7335        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, k);
7336        transmute(r)
7337    }
7338}
7339
7340/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7341/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7342///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7343///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7344///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7345///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7346/// The sign is determined by sc which can take the following values:\
7347///    _MM_MANT_SIGN_src     // sign = sign(src)\
7348///    _MM_MANT_SIGN_zero    // sign = 0\
7349///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7350///
7351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
7352#[inline]
7353#[target_feature(enable = "avx512f")]
7354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7355#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7356#[rustc_legacy_const_generics(1, 2)]
7357pub fn _mm512_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7358    a: __m512d,
7359) -> __m512d {
7360    unsafe {
7361        static_assert_uimm_bits!(NORM, 4);
7362        static_assert_uimm_bits!(SIGN, 2);
7363        let a = a.as_f64x8();
7364        let zero = f64x8::ZERO;
7365        let r = vgetmantpd(
7366            a,
7367            SIGN << 2 | NORM,
7368            zero,
7369            0b11111111,
7370            _MM_FROUND_CUR_DIRECTION,
7371        );
7372        transmute(r)
7373    }
7374}
7375
7376/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7377/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7378///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7379///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7380///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7381///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7382/// The sign is determined by sc which can take the following values:\
7383///    _MM_MANT_SIGN_src     // sign = sign(src)\
7384///    _MM_MANT_SIGN_zero    // sign = 0\
7385///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7386///
7387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
7388#[inline]
7389#[target_feature(enable = "avx512f")]
7390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7391#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7392#[rustc_legacy_const_generics(3, 4)]
7393pub fn _mm512_mask_getmant_pd<
7394    const NORM: _MM_MANTISSA_NORM_ENUM,
7395    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7396>(
7397    src: __m512d,
7398    k: __mmask8,
7399    a: __m512d,
7400) -> __m512d {
7401    unsafe {
7402        static_assert_uimm_bits!(NORM, 4);
7403        static_assert_uimm_bits!(SIGN, 2);
7404        let a = a.as_f64x8();
7405        let src = src.as_f64x8();
7406        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7407        transmute(r)
7408    }
7409}
7410
7411/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7412/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7413///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7414///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7415///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7416///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7417/// The sign is determined by sc which can take the following values:\
7418///    _MM_MANT_SIGN_src     // sign = sign(src)\
7419///    _MM_MANT_SIGN_zero    // sign = 0\
7420///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7421///
7422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
7423#[inline]
7424#[target_feature(enable = "avx512f")]
7425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7426#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7427#[rustc_legacy_const_generics(2, 3)]
7428pub fn _mm512_maskz_getmant_pd<
7429    const NORM: _MM_MANTISSA_NORM_ENUM,
7430    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7431>(
7432    k: __mmask8,
7433    a: __m512d,
7434) -> __m512d {
7435    unsafe {
7436        static_assert_uimm_bits!(NORM, 4);
7437        static_assert_uimm_bits!(SIGN, 2);
7438        let a = a.as_f64x8();
7439        let r = vgetmantpd(
7440            a,
7441            SIGN << 2 | NORM,
7442            f64x8::ZERO,
7443            k,
7444            _MM_FROUND_CUR_DIRECTION,
7445        );
7446        transmute(r)
7447    }
7448}
7449
7450/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7451/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7452///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7453///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7454///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7455///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7456/// The sign is determined by sc which can take the following values:\
7457///    _MM_MANT_SIGN_src     // sign = sign(src)\
7458///    _MM_MANT_SIGN_zero    // sign = 0\
7459///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7460///
7461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
7462#[inline]
7463#[target_feature(enable = "avx512f,avx512vl")]
7464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7465#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7466#[rustc_legacy_const_generics(1, 2)]
7467pub fn _mm256_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7468    a: __m256d,
7469) -> __m256d {
7470    unsafe {
7471        static_assert_uimm_bits!(NORM, 4);
7472        static_assert_uimm_bits!(SIGN, 2);
7473        let a = a.as_f64x4();
7474        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, 0b00001111);
7475        transmute(r)
7476    }
7477}
7478
7479/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7480/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7481///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7482///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7483///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7484///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7485/// The sign is determined by sc which can take the following values:\
7486///    _MM_MANT_SIGN_src     // sign = sign(src)\
7487///    _MM_MANT_SIGN_zero    // sign = 0\
7488///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7489///
7490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
7491#[inline]
7492#[target_feature(enable = "avx512f,avx512vl")]
7493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7494#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7495#[rustc_legacy_const_generics(3, 4)]
7496pub fn _mm256_mask_getmant_pd<
7497    const NORM: _MM_MANTISSA_NORM_ENUM,
7498    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7499>(
7500    src: __m256d,
7501    k: __mmask8,
7502    a: __m256d,
7503) -> __m256d {
7504    unsafe {
7505        static_assert_uimm_bits!(NORM, 4);
7506        static_assert_uimm_bits!(SIGN, 2);
7507        let a = a.as_f64x4();
7508        let src = src.as_f64x4();
7509        let r = vgetmantpd256(a, SIGN << 2 | NORM, src, k);
7510        transmute(r)
7511    }
7512}
7513
7514/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7515/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7516///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7517///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7518///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7519///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7520/// The sign is determined by sc which can take the following values:\
7521///    _MM_MANT_SIGN_src     // sign = sign(src)\
7522///    _MM_MANT_SIGN_zero    // sign = 0\
7523///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7524///
7525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
7526#[inline]
7527#[target_feature(enable = "avx512f,avx512vl")]
7528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7529#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7530#[rustc_legacy_const_generics(2, 3)]
7531pub fn _mm256_maskz_getmant_pd<
7532    const NORM: _MM_MANTISSA_NORM_ENUM,
7533    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7534>(
7535    k: __mmask8,
7536    a: __m256d,
7537) -> __m256d {
7538    unsafe {
7539        static_assert_uimm_bits!(NORM, 4);
7540        static_assert_uimm_bits!(SIGN, 2);
7541        let a = a.as_f64x4();
7542        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, k);
7543        transmute(r)
7544    }
7545}
7546
7547/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7548/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7549///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7550///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7551///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7552///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7553/// The sign is determined by sc which can take the following values:\
7554///    _MM_MANT_SIGN_src     // sign = sign(src)\
7555///    _MM_MANT_SIGN_zero    // sign = 0\
7556///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7557///
7558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7559#[inline]
7560#[target_feature(enable = "avx512f,avx512vl")]
7561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7562#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7563#[rustc_legacy_const_generics(1, 2)]
7564pub fn _mm_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7565    a: __m128d,
7566) -> __m128d {
7567    unsafe {
7568        static_assert_uimm_bits!(NORM, 4);
7569        static_assert_uimm_bits!(SIGN, 2);
7570        let a = a.as_f64x2();
7571        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, 0b00000011);
7572        transmute(r)
7573    }
7574}
7575
7576/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7577/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7578///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7579///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7580///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7581///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7582/// The sign is determined by sc which can take the following values:\
7583///    _MM_MANT_SIGN_src     // sign = sign(src)\
7584///    _MM_MANT_SIGN_zero    // sign = 0\
7585///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7586///
7587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7588#[inline]
7589#[target_feature(enable = "avx512f,avx512vl")]
7590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7591#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7592#[rustc_legacy_const_generics(3, 4)]
7593pub fn _mm_mask_getmant_pd<
7594    const NORM: _MM_MANTISSA_NORM_ENUM,
7595    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7596>(
7597    src: __m128d,
7598    k: __mmask8,
7599    a: __m128d,
7600) -> __m128d {
7601    unsafe {
7602        static_assert_uimm_bits!(NORM, 4);
7603        static_assert_uimm_bits!(SIGN, 2);
7604        let a = a.as_f64x2();
7605        let src = src.as_f64x2();
7606        let r = vgetmantpd128(a, SIGN << 2 | NORM, src, k);
7607        transmute(r)
7608    }
7609}
7610
7611/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7612/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7613///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7614///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7615///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7616///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7617/// The sign is determined by sc which can take the following values:\
7618///    _MM_MANT_SIGN_src     // sign = sign(src)\
7619///    _MM_MANT_SIGN_zero    // sign = 0\
7620///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7621///
7622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7623#[inline]
7624#[target_feature(enable = "avx512f,avx512vl")]
7625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7626#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7627#[rustc_legacy_const_generics(2, 3)]
7628pub fn _mm_maskz_getmant_pd<
7629    const NORM: _MM_MANTISSA_NORM_ENUM,
7630    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7631>(
7632    k: __mmask8,
7633    a: __m128d,
7634) -> __m128d {
7635    unsafe {
7636        static_assert_uimm_bits!(NORM, 4);
7637        static_assert_uimm_bits!(SIGN, 2);
7638        let a = a.as_f64x2();
7639        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, k);
7640        transmute(r)
7641    }
7642}
7643
7644/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7645///
7646/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7647/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7648/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7649/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7650/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7651/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7652///
7653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7654#[inline]
7655#[target_feature(enable = "avx512f")]
7656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7657#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7658#[rustc_legacy_const_generics(2)]
7659pub fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7660    unsafe {
7661        static_assert_rounding!(ROUNDING);
7662        let a = a.as_f32x16();
7663        let b = b.as_f32x16();
7664        let r = vaddps(a, b, ROUNDING);
7665        transmute(r)
7666    }
7667}
7668
7669/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7670///
7671/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7672/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7673/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7674/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7675/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7677///
7678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7679#[inline]
7680#[target_feature(enable = "avx512f")]
7681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7682#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7683#[rustc_legacy_const_generics(4)]
7684pub fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7685    src: __m512,
7686    k: __mmask16,
7687    a: __m512,
7688    b: __m512,
7689) -> __m512 {
7690    unsafe {
7691        static_assert_rounding!(ROUNDING);
7692        let a = a.as_f32x16();
7693        let b = b.as_f32x16();
7694        let r = vaddps(a, b, ROUNDING);
7695        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7696    }
7697}
7698
7699/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7700///
7701/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7702/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7703/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7704/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7705/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7706/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7707///
7708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7709#[inline]
7710#[target_feature(enable = "avx512f")]
7711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7712#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7713#[rustc_legacy_const_generics(3)]
7714pub fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7715    k: __mmask16,
7716    a: __m512,
7717    b: __m512,
7718) -> __m512 {
7719    unsafe {
7720        static_assert_rounding!(ROUNDING);
7721        let a = a.as_f32x16();
7722        let b = b.as_f32x16();
7723        let r = vaddps(a, b, ROUNDING);
7724        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7725    }
7726}
7727
7728/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
7729///
7730/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7731/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7732/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7733/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7734/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7735/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7736///
7737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
7738#[inline]
7739#[target_feature(enable = "avx512f")]
7740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7741#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7742#[rustc_legacy_const_generics(2)]
7743pub fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7744    unsafe {
7745        static_assert_rounding!(ROUNDING);
7746        let a = a.as_f64x8();
7747        let b = b.as_f64x8();
7748        let r = vaddpd(a, b, ROUNDING);
7749        transmute(r)
7750    }
7751}
7752
7753/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7754///
7755/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7756/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7757/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7758/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7759/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7760/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7761///
7762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
7763#[inline]
7764#[target_feature(enable = "avx512f")]
7765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7766#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7767#[rustc_legacy_const_generics(4)]
7768pub fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
7769    src: __m512d,
7770    k: __mmask8,
7771    a: __m512d,
7772    b: __m512d,
7773) -> __m512d {
7774    unsafe {
7775        static_assert_rounding!(ROUNDING);
7776        let a = a.as_f64x8();
7777        let b = b.as_f64x8();
7778        let r = vaddpd(a, b, ROUNDING);
7779        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7780    }
7781}
7782
7783/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7784///
7785/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7786/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7787/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7788/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7789/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7790/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7791///
7792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
7793#[inline]
7794#[target_feature(enable = "avx512f")]
7795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7796#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7797#[rustc_legacy_const_generics(3)]
7798pub fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
7799    k: __mmask8,
7800    a: __m512d,
7801    b: __m512d,
7802) -> __m512d {
7803    unsafe {
7804        static_assert_rounding!(ROUNDING);
7805        let a = a.as_f64x8();
7806        let b = b.as_f64x8();
7807        let r = vaddpd(a, b, ROUNDING);
7808        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7809    }
7810}
7811
7812/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
7813///
7814/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7815/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7816/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7817/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7818/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7819/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7820///
7821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
7822#[inline]
7823#[target_feature(enable = "avx512f")]
7824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7825#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7826#[rustc_legacy_const_generics(2)]
7827pub fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7828    unsafe {
7829        static_assert_rounding!(ROUNDING);
7830        let a = a.as_f32x16();
7831        let b = b.as_f32x16();
7832        let r = vsubps(a, b, ROUNDING);
7833        transmute(r)
7834    }
7835}
7836
7837/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7838///
7839/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7840/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7841/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7842/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7843/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7844/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7845///
7846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
7847#[inline]
7848#[target_feature(enable = "avx512f")]
7849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7850#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7851#[rustc_legacy_const_generics(4)]
7852pub fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
7853    src: __m512,
7854    k: __mmask16,
7855    a: __m512,
7856    b: __m512,
7857) -> __m512 {
7858    unsafe {
7859        static_assert_rounding!(ROUNDING);
7860        let a = a.as_f32x16();
7861        let b = b.as_f32x16();
7862        let r = vsubps(a, b, ROUNDING);
7863        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7864    }
7865}
7866
7867/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7868///
7869/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7870/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7871/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7872/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7873/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7874/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7875///
7876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
7877#[inline]
7878#[target_feature(enable = "avx512f")]
7879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7880#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7881#[rustc_legacy_const_generics(3)]
7882pub fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
7883    k: __mmask16,
7884    a: __m512,
7885    b: __m512,
7886) -> __m512 {
7887    unsafe {
7888        static_assert_rounding!(ROUNDING);
7889        let a = a.as_f32x16();
7890        let b = b.as_f32x16();
7891        let r = vsubps(a, b, ROUNDING);
7892        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7893    }
7894}
7895
7896/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
7897///
7898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7904///
7905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
7906#[inline]
7907#[target_feature(enable = "avx512f")]
7908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7909#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7910#[rustc_legacy_const_generics(2)]
7911pub fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7912    unsafe {
7913        static_assert_rounding!(ROUNDING);
7914        let a = a.as_f64x8();
7915        let b = b.as_f64x8();
7916        let r = vsubpd(a, b, ROUNDING);
7917        transmute(r)
7918    }
7919}
7920
7921/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7922///
7923/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7924/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7925/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7926/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7927/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7928/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7929///
7930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
7931#[inline]
7932#[target_feature(enable = "avx512f")]
7933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7934#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7935#[rustc_legacy_const_generics(4)]
7936pub fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
7937    src: __m512d,
7938    k: __mmask8,
7939    a: __m512d,
7940    b: __m512d,
7941) -> __m512d {
7942    unsafe {
7943        static_assert_rounding!(ROUNDING);
7944        let a = a.as_f64x8();
7945        let b = b.as_f64x8();
7946        let r = vsubpd(a, b, ROUNDING);
7947        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7948    }
7949}
7950
7951/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7952///
7953/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7954/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7955/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7956/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7957/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7958/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7959///
7960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
7961#[inline]
7962#[target_feature(enable = "avx512f")]
7963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7964#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7965#[rustc_legacy_const_generics(3)]
7966pub fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
7967    k: __mmask8,
7968    a: __m512d,
7969    b: __m512d,
7970) -> __m512d {
7971    unsafe {
7972        static_assert_rounding!(ROUNDING);
7973        let a = a.as_f64x8();
7974        let b = b.as_f64x8();
7975        let r = vsubpd(a, b, ROUNDING);
7976        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7977    }
7978}
7979
7980/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7981///
7982/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7983/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7984/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7985/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7986/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7987/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7988///
7989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
7990#[inline]
7991#[target_feature(enable = "avx512f")]
7992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7993#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
7994#[rustc_legacy_const_generics(2)]
7995pub fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7996    unsafe {
7997        static_assert_rounding!(ROUNDING);
7998        let a = a.as_f32x16();
7999        let b = b.as_f32x16();
8000        let r = vmulps(a, b, ROUNDING);
8001        transmute(r)
8002    }
8003}
8004
8005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8006///
8007/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8008/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8009/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8010/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8011/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8012/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8013///
8014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
8015#[inline]
8016#[target_feature(enable = "avx512f")]
8017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8018#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8019#[rustc_legacy_const_generics(4)]
8020pub fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
8021    src: __m512,
8022    k: __mmask16,
8023    a: __m512,
8024    b: __m512,
8025) -> __m512 {
8026    unsafe {
8027        static_assert_rounding!(ROUNDING);
8028        let a = a.as_f32x16();
8029        let b = b.as_f32x16();
8030        let r = vmulps(a, b, ROUNDING);
8031        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8032    }
8033}
8034
8035/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8036///
8037/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8038/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8039/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8040/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8041/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8042/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8043///
8044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
8045#[inline]
8046#[target_feature(enable = "avx512f")]
8047#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8048#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8049#[rustc_legacy_const_generics(3)]
8050pub fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
8051    k: __mmask16,
8052    a: __m512,
8053    b: __m512,
8054) -> __m512 {
8055    unsafe {
8056        static_assert_rounding!(ROUNDING);
8057        let a = a.as_f32x16();
8058        let b = b.as_f32x16();
8059        let r = vmulps(a, b, ROUNDING);
8060        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8061    }
8062}
8063
8064/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8065///
8066/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8067/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8068/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8069/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8070/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8071/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8072///
8073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
8074#[inline]
8075#[target_feature(enable = "avx512f")]
8076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8077#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8078#[rustc_legacy_const_generics(2)]
8079pub fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8080    unsafe {
8081        static_assert_rounding!(ROUNDING);
8082        let a = a.as_f64x8();
8083        let b = b.as_f64x8();
8084        let r = vmulpd(a, b, ROUNDING);
8085        transmute(r)
8086    }
8087}
8088
8089/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8090///
8091/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8092/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8093/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8094/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8095/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8096/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8097///
8098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
8099#[inline]
8100#[target_feature(enable = "avx512f")]
8101#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8102#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8103#[rustc_legacy_const_generics(4)]
8104pub fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
8105    src: __m512d,
8106    k: __mmask8,
8107    a: __m512d,
8108    b: __m512d,
8109) -> __m512d {
8110    unsafe {
8111        static_assert_rounding!(ROUNDING);
8112        let a = a.as_f64x8();
8113        let b = b.as_f64x8();
8114        let r = vmulpd(a, b, ROUNDING);
8115        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8116    }
8117}
8118
8119/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8120///
8121/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8122/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8123/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8124/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8125/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8126/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8127///
8128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_pd&expand=3939)
8129#[inline]
8130#[target_feature(enable = "avx512f")]
8131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8132#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8133#[rustc_legacy_const_generics(3)]
8134pub fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
8135    k: __mmask8,
8136    a: __m512d,
8137    b: __m512d,
8138) -> __m512d {
8139    unsafe {
8140        static_assert_rounding!(ROUNDING);
8141        let a = a.as_f64x8();
8142        let b = b.as_f64x8();
8143        let r = vmulpd(a, b, ROUNDING);
8144        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8145    }
8146}
8147
8148/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
8149///
8150/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8151/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8152/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8153/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8154/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8156///
8157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
8158#[inline]
8159#[target_feature(enable = "avx512f")]
8160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8161#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8162#[rustc_legacy_const_generics(2)]
8163pub fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8164    unsafe {
8165        static_assert_rounding!(ROUNDING);
8166        let a = a.as_f32x16();
8167        let b = b.as_f32x16();
8168        let r = vdivps(a, b, ROUNDING);
8169        transmute(r)
8170    }
8171}
8172
8173/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8174///
8175/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8176/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8177/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8178/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8179/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8180/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8181///
8182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
8183#[inline]
8184#[target_feature(enable = "avx512f")]
8185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8186#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8187#[rustc_legacy_const_generics(4)]
8188pub fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
8189    src: __m512,
8190    k: __mmask16,
8191    a: __m512,
8192    b: __m512,
8193) -> __m512 {
8194    unsafe {
8195        static_assert_rounding!(ROUNDING);
8196        let a = a.as_f32x16();
8197        let b = b.as_f32x16();
8198        let r = vdivps(a, b, ROUNDING);
8199        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8200    }
8201}
8202
8203/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8204///
8205/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8206/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8207/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8208/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8209/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8210/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8211///
8212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
8213#[inline]
8214#[target_feature(enable = "avx512f")]
8215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8216#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8217#[rustc_legacy_const_generics(3)]
8218pub fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
8219    k: __mmask16,
8220    a: __m512,
8221    b: __m512,
8222) -> __m512 {
8223    unsafe {
8224        static_assert_rounding!(ROUNDING);
8225        let a = a.as_f32x16();
8226        let b = b.as_f32x16();
8227        let r = vdivps(a, b, ROUNDING);
8228        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8229    }
8230}
8231
8232/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
8233///
8234/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8240///
8241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
8242#[inline]
8243#[target_feature(enable = "avx512f")]
8244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8245#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8246#[rustc_legacy_const_generics(2)]
8247pub fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8248    unsafe {
8249        static_assert_rounding!(ROUNDING);
8250        let a = a.as_f64x8();
8251        let b = b.as_f64x8();
8252        let r = vdivpd(a, b, ROUNDING);
8253        transmute(r)
8254    }
8255}
8256
8257/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8258///
8259/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8260/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8261/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8262/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8263/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8264/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8265///
8266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
8267#[inline]
8268#[target_feature(enable = "avx512f")]
8269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8270#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8271#[rustc_legacy_const_generics(4)]
8272pub fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
8273    src: __m512d,
8274    k: __mmask8,
8275    a: __m512d,
8276    b: __m512d,
8277) -> __m512d {
8278    unsafe {
8279        static_assert_rounding!(ROUNDING);
8280        let a = a.as_f64x8();
8281        let b = b.as_f64x8();
8282        let r = vdivpd(a, b, ROUNDING);
8283        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8284    }
8285}
8286
8287/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8288///
8289/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8290/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8291/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8292/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8293/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8294/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8295///
8296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
8297#[inline]
8298#[target_feature(enable = "avx512f")]
8299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8300#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8301#[rustc_legacy_const_generics(3)]
8302pub fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
8303    k: __mmask8,
8304    a: __m512d,
8305    b: __m512d,
8306) -> __m512d {
8307    unsafe {
8308        static_assert_rounding!(ROUNDING);
8309        let a = a.as_f64x8();
8310        let b = b.as_f64x8();
8311        let r = vdivpd(a, b, ROUNDING);
8312        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8313    }
8314}
8315
8316/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8317///
8318/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8319/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8320/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8321/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8322/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8323/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8324///
8325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
8326#[inline]
8327#[target_feature(enable = "avx512f")]
8328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8329#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8330#[rustc_legacy_const_generics(1)]
8331pub fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
8332    unsafe {
8333        static_assert_rounding!(ROUNDING);
8334        let a = a.as_f32x16();
8335        let r = vsqrtps(a, ROUNDING);
8336        transmute(r)
8337    }
8338}
8339
8340/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8341///
8342/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8343/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8344/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8345/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8346/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8347/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8348///
8349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
8350#[inline]
8351#[target_feature(enable = "avx512f")]
8352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8353#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8354#[rustc_legacy_const_generics(3)]
8355pub fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
8356    src: __m512,
8357    k: __mmask16,
8358    a: __m512,
8359) -> __m512 {
8360    unsafe {
8361        static_assert_rounding!(ROUNDING);
8362        let a = a.as_f32x16();
8363        let r = vsqrtps(a, ROUNDING);
8364        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8365    }
8366}
8367
8368/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8369///
8370/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8371/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8372/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8373/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8374/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8375/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8376///
8377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
8378#[inline]
8379#[target_feature(enable = "avx512f")]
8380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8381#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8382#[rustc_legacy_const_generics(2)]
8383pub fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
8384    unsafe {
8385        static_assert_rounding!(ROUNDING);
8386        let a = a.as_f32x16();
8387        let r = vsqrtps(a, ROUNDING);
8388        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8389    }
8390}
8391
8392/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8393///
8394/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8395/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8396/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8397/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8398/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8399/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8400///
8401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
8402#[inline]
8403#[target_feature(enable = "avx512f")]
8404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8405#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8406#[rustc_legacy_const_generics(1)]
8407pub fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
8408    unsafe {
8409        static_assert_rounding!(ROUNDING);
8410        let a = a.as_f64x8();
8411        let r = vsqrtpd(a, ROUNDING);
8412        transmute(r)
8413    }
8414}
8415
8416/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8417///
8418/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8419/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8420/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8421/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8422/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8423/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8424///
8425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
8426#[inline]
8427#[target_feature(enable = "avx512f")]
8428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8429#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8430#[rustc_legacy_const_generics(3)]
8431pub fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
8432    src: __m512d,
8433    k: __mmask8,
8434    a: __m512d,
8435) -> __m512d {
8436    unsafe {
8437        static_assert_rounding!(ROUNDING);
8438        let a = a.as_f64x8();
8439        let r = vsqrtpd(a, ROUNDING);
8440        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8441    }
8442}
8443
8444/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8445///
8446/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8447/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8448/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8449/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8450/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8451/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8452///
8453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
8454#[inline]
8455#[target_feature(enable = "avx512f")]
8456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8457#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8458#[rustc_legacy_const_generics(2)]
8459pub fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
8460    unsafe {
8461        static_assert_rounding!(ROUNDING);
8462        let a = a.as_f64x8();
8463        let r = vsqrtpd(a, ROUNDING);
8464        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8465    }
8466}
8467
8468/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8469///
8470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8476///
8477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
8478#[inline]
8479#[target_feature(enable = "avx512f")]
8480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8481#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8482#[rustc_legacy_const_generics(3)]
8483pub fn _mm512_fmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8484    unsafe {
8485        static_assert_rounding!(ROUNDING);
8486        vfmadd132psround(a, b, c, ROUNDING)
8487    }
8488}
8489
8490/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8491///
8492/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8493/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8494/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8495/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8496/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8497/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8498///
8499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
8500#[inline]
8501#[target_feature(enable = "avx512f")]
8502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8503#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8504#[rustc_legacy_const_generics(4)]
8505pub fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
8506    a: __m512,
8507    k: __mmask16,
8508    b: __m512,
8509    c: __m512,
8510) -> __m512 {
8511    unsafe {
8512        static_assert_rounding!(ROUNDING);
8513        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), a)
8514    }
8515}
8516
8517/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8518///
8519/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8520/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8521/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8522/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8523/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8524/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8525///
8526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
8527#[inline]
8528#[target_feature(enable = "avx512f")]
8529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8530#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8531#[rustc_legacy_const_generics(4)]
8532pub fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
8533    k: __mmask16,
8534    a: __m512,
8535    b: __m512,
8536    c: __m512,
8537) -> __m512 {
8538    unsafe {
8539        static_assert_rounding!(ROUNDING);
8540        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), _mm512_setzero_ps())
8541    }
8542}
8543
8544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8545///
8546/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8547/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8548/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8549/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8550/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8551/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8552///
8553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
8554#[inline]
8555#[target_feature(enable = "avx512f")]
8556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8557#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8558#[rustc_legacy_const_generics(4)]
8559pub fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
8560    a: __m512,
8561    b: __m512,
8562    c: __m512,
8563    k: __mmask16,
8564) -> __m512 {
8565    unsafe {
8566        static_assert_rounding!(ROUNDING);
8567        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), c)
8568    }
8569}
8570
8571/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8572///
8573/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8574/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8575/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8576/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8577/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8578/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8579///
8580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
8581#[inline]
8582#[target_feature(enable = "avx512f")]
8583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8584#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8585#[rustc_legacy_const_generics(3)]
8586pub fn _mm512_fmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8587    unsafe {
8588        static_assert_rounding!(ROUNDING);
8589        vfmadd132pdround(a, b, c, ROUNDING)
8590    }
8591}
8592
8593/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8594///
8595/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8596/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8597/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8598/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8599/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8600/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8601///
8602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
8603#[inline]
8604#[target_feature(enable = "avx512f")]
8605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8606#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8607#[rustc_legacy_const_generics(4)]
8608pub fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
8609    a: __m512d,
8610    k: __mmask8,
8611    b: __m512d,
8612    c: __m512d,
8613) -> __m512d {
8614    unsafe {
8615        static_assert_rounding!(ROUNDING);
8616        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), a)
8617    }
8618}
8619
8620/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8621///
8622/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8623/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8624/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8625/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8626/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8627/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8628///
8629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8630#[inline]
8631#[target_feature(enable = "avx512f")]
8632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8633#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8634#[rustc_legacy_const_generics(4)]
8635pub fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8636    k: __mmask8,
8637    a: __m512d,
8638    b: __m512d,
8639    c: __m512d,
8640) -> __m512d {
8641    unsafe {
8642        static_assert_rounding!(ROUNDING);
8643        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), _mm512_setzero_pd())
8644    }
8645}
8646
8647/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8648///
8649/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8650/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8651/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8652/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8653/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8654/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8655///
8656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8657#[inline]
8658#[target_feature(enable = "avx512f")]
8659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8660#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8661#[rustc_legacy_const_generics(4)]
8662pub fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8663    a: __m512d,
8664    b: __m512d,
8665    c: __m512d,
8666    k: __mmask8,
8667) -> __m512d {
8668    unsafe {
8669        static_assert_rounding!(ROUNDING);
8670        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), c)
8671    }
8672}
8673
8674/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8675///
8676/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8677/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8678/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8679/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8680/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8681/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8682///
8683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8684#[inline]
8685#[target_feature(enable = "avx512f")]
8686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8687#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8688#[rustc_legacy_const_generics(3)]
8689pub fn _mm512_fmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8690    unsafe {
8691        static_assert_rounding!(ROUNDING);
8692        vfmadd132psround(a, b, simd_neg(c), ROUNDING)
8693    }
8694}
8695
8696/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8697///
8698/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8699/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8700/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8701/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8702/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8703/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8704///
8705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8706#[inline]
8707#[target_feature(enable = "avx512f")]
8708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8709#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8710#[rustc_legacy_const_generics(4)]
8711pub fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8712    a: __m512,
8713    k: __mmask16,
8714    b: __m512,
8715    c: __m512,
8716) -> __m512 {
8717    unsafe {
8718        static_assert_rounding!(ROUNDING);
8719        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8720        simd_select_bitmask(k, r, a)
8721    }
8722}
8723
8724/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8725///
8726/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8727/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8728/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8729/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8730/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8731/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8732///
8733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
8734#[inline]
8735#[target_feature(enable = "avx512f")]
8736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8737#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8738#[rustc_legacy_const_generics(4)]
8739pub fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
8740    k: __mmask16,
8741    a: __m512,
8742    b: __m512,
8743    c: __m512,
8744) -> __m512 {
8745    unsafe {
8746        static_assert_rounding!(ROUNDING);
8747        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8748        simd_select_bitmask(k, r, _mm512_setzero_ps())
8749    }
8750}
8751
8752/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8753///
8754/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8755/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8756/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8757/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8758/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8759/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8760///
8761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
8762#[inline]
8763#[target_feature(enable = "avx512f")]
8764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8765#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8766#[rustc_legacy_const_generics(4)]
8767pub fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
8768    a: __m512,
8769    b: __m512,
8770    c: __m512,
8771    k: __mmask16,
8772) -> __m512 {
8773    unsafe {
8774        static_assert_rounding!(ROUNDING);
8775        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8776        simd_select_bitmask(k, r, c)
8777    }
8778}
8779
8780/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8781///
8782/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8783/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8784/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8785/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8786/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8787/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8788///
8789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
8790#[inline]
8791#[target_feature(enable = "avx512f")]
8792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8793#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8794#[rustc_legacy_const_generics(3)]
8795pub fn _mm512_fmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8796    unsafe {
8797        static_assert_rounding!(ROUNDING);
8798        vfmadd132pdround(a, b, simd_neg(c), ROUNDING)
8799    }
8800}
8801
8802/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8803///
8804/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8805/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8806/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8807/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8808/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8809/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8810///
8811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
8812#[inline]
8813#[target_feature(enable = "avx512f")]
8814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8815#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8816#[rustc_legacy_const_generics(4)]
8817pub fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
8818    a: __m512d,
8819    k: __mmask8,
8820    b: __m512d,
8821    c: __m512d,
8822) -> __m512d {
8823    unsafe {
8824        static_assert_rounding!(ROUNDING);
8825        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8826        simd_select_bitmask(k, r, a)
8827    }
8828}
8829
8830/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8831///
8832/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8833/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8834/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8835/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8836/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8837/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8838///
8839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
8840#[inline]
8841#[target_feature(enable = "avx512f")]
8842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8843#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8844#[rustc_legacy_const_generics(4)]
8845pub fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
8846    k: __mmask8,
8847    a: __m512d,
8848    b: __m512d,
8849    c: __m512d,
8850) -> __m512d {
8851    unsafe {
8852        static_assert_rounding!(ROUNDING);
8853        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8854        simd_select_bitmask(k, r, _mm512_setzero_pd())
8855    }
8856}
8857
8858/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8859///
8860/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8861/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8862/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8863/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8864/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8865/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8866///
8867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
8868#[inline]
8869#[target_feature(enable = "avx512f")]
8870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8871#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8872#[rustc_legacy_const_generics(4)]
8873pub fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
8874    a: __m512d,
8875    b: __m512d,
8876    c: __m512d,
8877    k: __mmask8,
8878) -> __m512d {
8879    unsafe {
8880        static_assert_rounding!(ROUNDING);
8881        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8882        simd_select_bitmask(k, r, c)
8883    }
8884}
8885
8886/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8887///
8888/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8889/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8890/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8891/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8892/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8893/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8894///
8895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
8896#[inline]
8897#[target_feature(enable = "avx512f")]
8898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8899#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8900#[rustc_legacy_const_generics(3)]
8901pub fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8902    unsafe {
8903        static_assert_rounding!(ROUNDING);
8904        vfmaddsubpsround(a, b, c, ROUNDING)
8905    }
8906}
8907
8908/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8909///
8910/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8911/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8912/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8913/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8914/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8915/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8916///
8917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
8918#[inline]
8919#[target_feature(enable = "avx512f")]
8920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8921#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8922#[rustc_legacy_const_generics(4)]
8923pub fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
8924    a: __m512,
8925    k: __mmask16,
8926    b: __m512,
8927    c: __m512,
8928) -> __m512 {
8929    unsafe {
8930        static_assert_rounding!(ROUNDING);
8931        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), a)
8932    }
8933}
8934
8935/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8936///
8937/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8938/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8939/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8940/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8941/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8942/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8943///
8944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
8945#[inline]
8946#[target_feature(enable = "avx512f")]
8947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8948#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8949#[rustc_legacy_const_generics(4)]
8950pub fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
8951    k: __mmask16,
8952    a: __m512,
8953    b: __m512,
8954    c: __m512,
8955) -> __m512 {
8956    unsafe {
8957        static_assert_rounding!(ROUNDING);
8958        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), _mm512_setzero_ps())
8959    }
8960}
8961
8962/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8963///
8964/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8965/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8966/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8967/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8968/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8969/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8970///
8971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
8972#[inline]
8973#[target_feature(enable = "avx512f")]
8974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8975#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8976#[rustc_legacy_const_generics(4)]
8977pub fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
8978    a: __m512,
8979    b: __m512,
8980    c: __m512,
8981    k: __mmask16,
8982) -> __m512 {
8983    unsafe {
8984        static_assert_rounding!(ROUNDING);
8985        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), c)
8986    }
8987}
8988
8989/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8990///
8991/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8992/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8993/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8994/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8995/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8996/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8997///
8998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
8999#[inline]
9000#[target_feature(enable = "avx512f")]
9001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9002#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9003#[rustc_legacy_const_generics(3)]
9004pub fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
9005    a: __m512d,
9006    b: __m512d,
9007    c: __m512d,
9008) -> __m512d {
9009    unsafe {
9010        static_assert_rounding!(ROUNDING);
9011        vfmaddsubpdround(a, b, c, ROUNDING)
9012    }
9013}
9014
9015/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9016///
9017/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9018/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9019/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9020/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9021/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9022/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9023///
9024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
9025#[inline]
9026#[target_feature(enable = "avx512f")]
9027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9028#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9029#[rustc_legacy_const_generics(4)]
9030pub fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
9031    a: __m512d,
9032    k: __mmask8,
9033    b: __m512d,
9034    c: __m512d,
9035) -> __m512d {
9036    unsafe {
9037        static_assert_rounding!(ROUNDING);
9038        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), a)
9039    }
9040}
9041
9042/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9043///
9044/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9045/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9046/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9047/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9048/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9049/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9050///
9051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
9052#[inline]
9053#[target_feature(enable = "avx512f")]
9054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9055#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9056#[rustc_legacy_const_generics(4)]
9057pub fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
9058    k: __mmask8,
9059    a: __m512d,
9060    b: __m512d,
9061    c: __m512d,
9062) -> __m512d {
9063    unsafe {
9064        static_assert_rounding!(ROUNDING);
9065        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), _mm512_setzero_pd())
9066    }
9067}
9068
9069/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9070///
9071/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9072/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9073/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9074/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9075/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9076/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9077///
9078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
9079#[inline]
9080#[target_feature(enable = "avx512f")]
9081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9082#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9083#[rustc_legacy_const_generics(4)]
9084pub fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
9085    a: __m512d,
9086    b: __m512d,
9087    c: __m512d,
9088    k: __mmask8,
9089) -> __m512d {
9090    unsafe {
9091        static_assert_rounding!(ROUNDING);
9092        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), c)
9093    }
9094}
9095
9096/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9097///
9098/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9099/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9100/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9101/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9102/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9103/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9104///
9105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
9106#[inline]
9107#[target_feature(enable = "avx512f")]
9108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9109#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9110#[rustc_legacy_const_generics(3)]
9111pub fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9112    unsafe {
9113        static_assert_rounding!(ROUNDING);
9114        vfmaddsubpsround(a, b, simd_neg(c), ROUNDING)
9115    }
9116}
9117
9118/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9119///
9120/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9121/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9122/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9123/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9124/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9125/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9126///
9127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
9128#[inline]
9129#[target_feature(enable = "avx512f")]
9130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9131#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9132#[rustc_legacy_const_generics(4)]
9133pub fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
9134    a: __m512,
9135    k: __mmask16,
9136    b: __m512,
9137    c: __m512,
9138) -> __m512 {
9139    unsafe {
9140        static_assert_rounding!(ROUNDING);
9141        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9142        simd_select_bitmask(k, r, a)
9143    }
9144}
9145
9146/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9147///
9148/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9149/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9150/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9151/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9152/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9153/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9154///
9155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
9156#[inline]
9157#[target_feature(enable = "avx512f")]
9158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9159#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9160#[rustc_legacy_const_generics(4)]
9161pub fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
9162    k: __mmask16,
9163    a: __m512,
9164    b: __m512,
9165    c: __m512,
9166) -> __m512 {
9167    unsafe {
9168        static_assert_rounding!(ROUNDING);
9169        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9170        simd_select_bitmask(k, r, _mm512_setzero_ps())
9171    }
9172}
9173
9174/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9175///
9176/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9177/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9178/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9179/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9180/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9181/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9182///
9183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
9184#[inline]
9185#[target_feature(enable = "avx512f")]
9186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9187#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9188#[rustc_legacy_const_generics(4)]
9189pub fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
9190    a: __m512,
9191    b: __m512,
9192    c: __m512,
9193    k: __mmask16,
9194) -> __m512 {
9195    unsafe {
9196        static_assert_rounding!(ROUNDING);
9197        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9198        simd_select_bitmask(k, r, c)
9199    }
9200}
9201
9202/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9203///
9204/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9205/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9206/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9207/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9208/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9209/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9210///
9211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
9212#[inline]
9213#[target_feature(enable = "avx512f")]
9214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9215#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9216#[rustc_legacy_const_generics(3)]
9217pub fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
9218    a: __m512d,
9219    b: __m512d,
9220    c: __m512d,
9221) -> __m512d {
9222    unsafe {
9223        static_assert_rounding!(ROUNDING);
9224        vfmaddsubpdround(a, b, simd_neg(c), ROUNDING)
9225    }
9226}
9227
9228/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9229///
9230/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9231/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9232/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9233/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9234/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9235/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9236///
9237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
9238#[inline]
9239#[target_feature(enable = "avx512f")]
9240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9241#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9242#[rustc_legacy_const_generics(4)]
9243pub fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
9244    a: __m512d,
9245    k: __mmask8,
9246    b: __m512d,
9247    c: __m512d,
9248) -> __m512d {
9249    unsafe {
9250        static_assert_rounding!(ROUNDING);
9251        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9252        simd_select_bitmask(k, r, a)
9253    }
9254}
9255
9256/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9257///
9258/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9259/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9260/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9261/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9262/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9263/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9264///
9265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
9266#[inline]
9267#[target_feature(enable = "avx512f")]
9268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9269#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9270#[rustc_legacy_const_generics(4)]
9271pub fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
9272    k: __mmask8,
9273    a: __m512d,
9274    b: __m512d,
9275    c: __m512d,
9276) -> __m512d {
9277    unsafe {
9278        static_assert_rounding!(ROUNDING);
9279        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9280        simd_select_bitmask(k, r, _mm512_setzero_pd())
9281    }
9282}
9283
9284/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9285///
9286/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9287/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9288/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9289/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9290/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9291/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9292///
9293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
9294#[inline]
9295#[target_feature(enable = "avx512f")]
9296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9297#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9298#[rustc_legacy_const_generics(4)]
9299pub fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
9300    a: __m512d,
9301    b: __m512d,
9302    c: __m512d,
9303    k: __mmask8,
9304) -> __m512d {
9305    unsafe {
9306        static_assert_rounding!(ROUNDING);
9307        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9308        simd_select_bitmask(k, r, c)
9309    }
9310}
9311
9312/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9313///
9314/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9315/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9316/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9317/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9318/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9319/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9320///
9321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
9322#[inline]
9323#[target_feature(enable = "avx512f")]
9324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9325#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9326#[rustc_legacy_const_generics(3)]
9327pub fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9328    unsafe {
9329        static_assert_rounding!(ROUNDING);
9330        vfmadd132psround(simd_neg(a), b, c, ROUNDING)
9331    }
9332}
9333
9334/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9335///
9336/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9337/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9338/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9339/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9340/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9341/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9342///
9343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
9344#[inline]
9345#[target_feature(enable = "avx512f")]
9346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9347#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9348#[rustc_legacy_const_generics(4)]
9349pub fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
9350    a: __m512,
9351    k: __mmask16,
9352    b: __m512,
9353    c: __m512,
9354) -> __m512 {
9355    unsafe {
9356        static_assert_rounding!(ROUNDING);
9357        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9358        simd_select_bitmask(k, r, a)
9359    }
9360}
9361
9362/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9363///
9364/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9365/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9366/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9367/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9368/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9369/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9370///
9371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
9372#[inline]
9373#[target_feature(enable = "avx512f")]
9374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9375#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9376#[rustc_legacy_const_generics(4)]
9377pub fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
9378    k: __mmask16,
9379    a: __m512,
9380    b: __m512,
9381    c: __m512,
9382) -> __m512 {
9383    unsafe {
9384        static_assert_rounding!(ROUNDING);
9385        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9386        simd_select_bitmask(k, r, _mm512_setzero_ps())
9387    }
9388}
9389
9390/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9391///
9392/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9393/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9394/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9395/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9396/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9397/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9398///
9399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
9400#[inline]
9401#[target_feature(enable = "avx512f")]
9402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9403#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9404#[rustc_legacy_const_generics(4)]
9405pub fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
9406    a: __m512,
9407    b: __m512,
9408    c: __m512,
9409    k: __mmask16,
9410) -> __m512 {
9411    unsafe {
9412        static_assert_rounding!(ROUNDING);
9413        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9414        simd_select_bitmask(k, r, c)
9415    }
9416}
9417
9418/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9419///
9420/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9421/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9422/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9423/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9424/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9425/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9426///
9427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_pd&expand=2711)
9428#[inline]
9429#[target_feature(enable = "avx512f")]
9430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9431#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9432#[rustc_legacy_const_generics(3)]
9433pub fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9434    unsafe {
9435        static_assert_rounding!(ROUNDING);
9436        vfmadd132pdround(simd_neg(a), b, c, ROUNDING)
9437    }
9438}
9439
9440/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9441///
9442/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9443/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9444/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9445/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9446/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9447/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9448///
9449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
9450#[inline]
9451#[target_feature(enable = "avx512f")]
9452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9453#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9454#[rustc_legacy_const_generics(4)]
9455pub fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
9456    a: __m512d,
9457    k: __mmask8,
9458    b: __m512d,
9459    c: __m512d,
9460) -> __m512d {
9461    unsafe {
9462        static_assert_rounding!(ROUNDING);
9463        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9464        simd_select_bitmask(k, r, a)
9465    }
9466}
9467
9468/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9469///
9470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9476///
9477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
9478#[inline]
9479#[target_feature(enable = "avx512f")]
9480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9481#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9482#[rustc_legacy_const_generics(4)]
9483pub fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
9484    k: __mmask8,
9485    a: __m512d,
9486    b: __m512d,
9487    c: __m512d,
9488) -> __m512d {
9489    unsafe {
9490        static_assert_rounding!(ROUNDING);
9491        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9492        simd_select_bitmask(k, r, _mm512_setzero_pd())
9493    }
9494}
9495
9496/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9497///
9498/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9499/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9500/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9501/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9502/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9503/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9504///
9505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
9506#[inline]
9507#[target_feature(enable = "avx512f")]
9508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9509#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9510#[rustc_legacy_const_generics(4)]
9511pub fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
9512    a: __m512d,
9513    b: __m512d,
9514    c: __m512d,
9515    k: __mmask8,
9516) -> __m512d {
9517    unsafe {
9518        static_assert_rounding!(ROUNDING);
9519        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9520        simd_select_bitmask(k, r, c)
9521    }
9522}
9523
9524/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9525///
9526/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9527/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9528/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9529/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9530/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9531/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9532///
9533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
9534#[inline]
9535#[target_feature(enable = "avx512f")]
9536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9537#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9538#[rustc_legacy_const_generics(3)]
9539pub fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9540    unsafe {
9541        static_assert_rounding!(ROUNDING);
9542        vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING)
9543    }
9544}
9545
9546/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9547///
9548/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9549/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9550/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9551/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9552/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9553/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9554///
9555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
9556#[inline]
9557#[target_feature(enable = "avx512f")]
9558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9559#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9560#[rustc_legacy_const_generics(4)]
9561pub fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
9562    a: __m512,
9563    k: __mmask16,
9564    b: __m512,
9565    c: __m512,
9566) -> __m512 {
9567    unsafe {
9568        static_assert_rounding!(ROUNDING);
9569        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9570        simd_select_bitmask(k, r, a)
9571    }
9572}
9573
9574/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9575///
9576/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9577/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9578/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9579/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9580/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9581/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9582///
9583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
9584#[inline]
9585#[target_feature(enable = "avx512f")]
9586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9587#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9588#[rustc_legacy_const_generics(4)]
9589pub fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
9590    k: __mmask16,
9591    a: __m512,
9592    b: __m512,
9593    c: __m512,
9594) -> __m512 {
9595    unsafe {
9596        static_assert_rounding!(ROUNDING);
9597        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9598        simd_select_bitmask(k, r, _mm512_setzero_ps())
9599    }
9600}
9601
9602/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9603///
9604/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9605/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9606/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9607/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9608/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9609/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9610///
9611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
9612#[inline]
9613#[target_feature(enable = "avx512f")]
9614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9615#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9616#[rustc_legacy_const_generics(4)]
9617pub fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
9618    a: __m512,
9619    b: __m512,
9620    c: __m512,
9621    k: __mmask16,
9622) -> __m512 {
9623    unsafe {
9624        static_assert_rounding!(ROUNDING);
9625        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9626        simd_select_bitmask(k, r, c)
9627    }
9628}
9629
9630/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9631///
9632/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9633/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9634/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9635/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9636/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9637/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9638///
9639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
9640#[inline]
9641#[target_feature(enable = "avx512f")]
9642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9643#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9644#[rustc_legacy_const_generics(3)]
9645pub fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9646    unsafe {
9647        static_assert_rounding!(ROUNDING);
9648        vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING)
9649    }
9650}
9651
9652/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9653///
9654/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9655/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9656/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9657/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9658/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9659/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9660///
9661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
9662#[inline]
9663#[target_feature(enable = "avx512f")]
9664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9665#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9666#[rustc_legacy_const_generics(4)]
9667pub fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9668    a: __m512d,
9669    k: __mmask8,
9670    b: __m512d,
9671    c: __m512d,
9672) -> __m512d {
9673    unsafe {
9674        static_assert_rounding!(ROUNDING);
9675        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9676        simd_select_bitmask(k, r, a)
9677    }
9678}
9679
9680/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9681///
9682/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9683/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9684/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9685/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9686/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9687/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9688///
9689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9690#[inline]
9691#[target_feature(enable = "avx512f")]
9692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9693#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9694#[rustc_legacy_const_generics(4)]
9695pub fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9696    k: __mmask8,
9697    a: __m512d,
9698    b: __m512d,
9699    c: __m512d,
9700) -> __m512d {
9701    unsafe {
9702        static_assert_rounding!(ROUNDING);
9703        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9704        simd_select_bitmask(k, r, _mm512_setzero_pd())
9705    }
9706}
9707
9708/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9709///
9710/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9711/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9712/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9713/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9714/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9715/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9716///
9717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9718#[inline]
9719#[target_feature(enable = "avx512f")]
9720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9721#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9722#[rustc_legacy_const_generics(4)]
9723pub fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
9724    a: __m512d,
9725    b: __m512d,
9726    c: __m512d,
9727    k: __mmask8,
9728) -> __m512d {
9729    unsafe {
9730        static_assert_rounding!(ROUNDING);
9731        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9732        simd_select_bitmask(k, r, c)
9733    }
9734}
9735
9736/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9737/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9738///
9739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
9740#[inline]
9741#[target_feature(enable = "avx512f")]
9742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9743#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9744#[rustc_legacy_const_generics(2)]
9745pub fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9746    unsafe {
9747        static_assert_sae!(SAE);
9748        let a = a.as_f32x16();
9749        let b = b.as_f32x16();
9750        let r = vmaxps(a, b, SAE);
9751        transmute(r)
9752    }
9753}
9754
9755/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9756/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9757///
9758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
9759#[inline]
9760#[target_feature(enable = "avx512f")]
9761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9762#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9763#[rustc_legacy_const_generics(4)]
9764pub fn _mm512_mask_max_round_ps<const SAE: i32>(
9765    src: __m512,
9766    k: __mmask16,
9767    a: __m512,
9768    b: __m512,
9769) -> __m512 {
9770    unsafe {
9771        static_assert_sae!(SAE);
9772        let a = a.as_f32x16();
9773        let b = b.as_f32x16();
9774        let r = vmaxps(a, b, SAE);
9775        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
9776    }
9777}
9778
9779/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9781///
9782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
9783#[inline]
9784#[target_feature(enable = "avx512f")]
9785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9786#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9787#[rustc_legacy_const_generics(3)]
9788pub fn _mm512_maskz_max_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9789    unsafe {
9790        static_assert_sae!(SAE);
9791        let a = a.as_f32x16();
9792        let b = b.as_f32x16();
9793        let r = vmaxps(a, b, SAE);
9794        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
9795    }
9796}
9797
9798/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9799/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9800///
9801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
9802#[inline]
9803#[target_feature(enable = "avx512f")]
9804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9805#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9806#[rustc_legacy_const_generics(2)]
9807pub fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9808    unsafe {
9809        static_assert_sae!(SAE);
9810        let a = a.as_f64x8();
9811        let b = b.as_f64x8();
9812        let r = vmaxpd(a, b, SAE);
9813        transmute(r)
9814    }
9815}
9816
9817/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9818/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9819///
9820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
9821#[inline]
9822#[target_feature(enable = "avx512f")]
9823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9824#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9825#[rustc_legacy_const_generics(4)]
9826pub fn _mm512_mask_max_round_pd<const SAE: i32>(
9827    src: __m512d,
9828    k: __mmask8,
9829    a: __m512d,
9830    b: __m512d,
9831) -> __m512d {
9832    unsafe {
9833        static_assert_sae!(SAE);
9834        let a = a.as_f64x8();
9835        let b = b.as_f64x8();
9836        let r = vmaxpd(a, b, SAE);
9837        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
9838    }
9839}
9840
9841/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9842/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9843///
9844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
9845#[inline]
9846#[target_feature(enable = "avx512f")]
9847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9848#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9849#[rustc_legacy_const_generics(3)]
9850pub fn _mm512_maskz_max_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9851    unsafe {
9852        static_assert_sae!(SAE);
9853        let a = a.as_f64x8();
9854        let b = b.as_f64x8();
9855        let r = vmaxpd(a, b, SAE);
9856        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
9857    }
9858}
9859
9860/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9861/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9862///
9863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
9864#[inline]
9865#[target_feature(enable = "avx512f")]
9866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9867#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9868#[rustc_legacy_const_generics(2)]
9869pub fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9870    unsafe {
9871        static_assert_sae!(SAE);
9872        let a = a.as_f32x16();
9873        let b = b.as_f32x16();
9874        let r = vminps(a, b, SAE);
9875        transmute(r)
9876    }
9877}
9878
9879/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9880/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9881///
9882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
9883#[inline]
9884#[target_feature(enable = "avx512f")]
9885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9886#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9887#[rustc_legacy_const_generics(4)]
9888pub fn _mm512_mask_min_round_ps<const SAE: i32>(
9889    src: __m512,
9890    k: __mmask16,
9891    a: __m512,
9892    b: __m512,
9893) -> __m512 {
9894    unsafe {
9895        static_assert_sae!(SAE);
9896        let a = a.as_f32x16();
9897        let b = b.as_f32x16();
9898        let r = vminps(a, b, SAE);
9899        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
9900    }
9901}
9902
9903/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9904/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9905///
9906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
9907#[inline]
9908#[target_feature(enable = "avx512f")]
9909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9910#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9911#[rustc_legacy_const_generics(3)]
9912pub fn _mm512_maskz_min_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9913    unsafe {
9914        static_assert_sae!(SAE);
9915        let a = a.as_f32x16();
9916        let b = b.as_f32x16();
9917        let r = vminps(a, b, SAE);
9918        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
9919    }
9920}
9921
9922/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9923/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9924///
9925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
9926#[inline]
9927#[target_feature(enable = "avx512f")]
9928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9929#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9930#[rustc_legacy_const_generics(2)]
9931pub fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9932    unsafe {
9933        static_assert_sae!(SAE);
9934        let a = a.as_f64x8();
9935        let b = b.as_f64x8();
9936        let r = vminpd(a, b, SAE);
9937        transmute(r)
9938    }
9939}
9940
9941/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9942/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9943///
9944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
9945#[inline]
9946#[target_feature(enable = "avx512f")]
9947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9948#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9949#[rustc_legacy_const_generics(4)]
9950pub fn _mm512_mask_min_round_pd<const SAE: i32>(
9951    src: __m512d,
9952    k: __mmask8,
9953    a: __m512d,
9954    b: __m512d,
9955) -> __m512d {
9956    unsafe {
9957        static_assert_sae!(SAE);
9958        let a = a.as_f64x8();
9959        let b = b.as_f64x8();
9960        let r = vminpd(a, b, SAE);
9961        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
9962    }
9963}
9964
9965/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9966/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9967///
9968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
9969#[inline]
9970#[target_feature(enable = "avx512f")]
9971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9972#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9973#[rustc_legacy_const_generics(3)]
9974pub fn _mm512_maskz_min_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9975    unsafe {
9976        static_assert_sae!(SAE);
9977        let a = a.as_f64x8();
9978        let b = b.as_f64x8();
9979        let r = vminpd(a, b, SAE);
9980        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
9981    }
9982}
9983
9984/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
9985/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9986///
9987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
9988#[inline]
9989#[target_feature(enable = "avx512f")]
9990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9991#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
9992#[rustc_legacy_const_generics(1)]
9993pub fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
9994    unsafe {
9995        static_assert_sae!(SAE);
9996        let a = a.as_f32x16();
9997        let r = vgetexpps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
9998        transmute(r)
9999    }
10000}
10001
10002/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10003/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10004///
10005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
10006#[inline]
10007#[target_feature(enable = "avx512f")]
10008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10009#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10010#[rustc_legacy_const_generics(3)]
10011pub fn _mm512_mask_getexp_round_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10012    unsafe {
10013        static_assert_sae!(SAE);
10014        let a = a.as_f32x16();
10015        let src = src.as_f32x16();
10016        let r = vgetexpps(a, src, k, SAE);
10017        transmute(r)
10018    }
10019}
10020
10021/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10022/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10023///
10024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
10025#[inline]
10026#[target_feature(enable = "avx512f")]
10027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10028#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10029#[rustc_legacy_const_generics(2)]
10030pub fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
10031    unsafe {
10032        static_assert_sae!(SAE);
10033        let a = a.as_f32x16();
10034        let r = vgetexpps(a, f32x16::ZERO, k, SAE);
10035        transmute(r)
10036    }
10037}
10038
10039/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10040/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10041///
10042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
10043#[inline]
10044#[target_feature(enable = "avx512f")]
10045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10046#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10047#[rustc_legacy_const_generics(1)]
10048pub fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
10049    unsafe {
10050        static_assert_sae!(SAE);
10051        let a = a.as_f64x8();
10052        let r = vgetexppd(a, f64x8::ZERO, 0b11111111, SAE);
10053        transmute(r)
10054    }
10055}
10056
10057/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10058/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10059///
10060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
10061#[inline]
10062#[target_feature(enable = "avx512f")]
10063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10064#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10065#[rustc_legacy_const_generics(3)]
10066pub fn _mm512_mask_getexp_round_pd<const SAE: i32>(
10067    src: __m512d,
10068    k: __mmask8,
10069    a: __m512d,
10070) -> __m512d {
10071    unsafe {
10072        static_assert_sae!(SAE);
10073        let a = a.as_f64x8();
10074        let src = src.as_f64x8();
10075        let r = vgetexppd(a, src, k, SAE);
10076        transmute(r)
10077    }
10078}
10079
10080/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10081/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10082///
10083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
10084#[inline]
10085#[target_feature(enable = "avx512f")]
10086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10087#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10088#[rustc_legacy_const_generics(2)]
10089pub fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
10090    unsafe {
10091        static_assert_sae!(SAE);
10092        let a = a.as_f64x8();
10093        let r = vgetexppd(a, f64x8::ZERO, k, SAE);
10094        transmute(r)
10095    }
10096}
10097
10098/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10099/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10100/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10101/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10102/// * [`_MM_FROUND_TO_POS_INF`] : round up
10103/// * [`_MM_FROUND_TO_ZERO`] : truncate
10104/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10105///
10106/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
10108#[inline]
10109#[target_feature(enable = "avx512f")]
10110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10111#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10112#[rustc_legacy_const_generics(1, 2)]
10113pub fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
10114    unsafe {
10115        static_assert_uimm_bits!(IMM8, 8);
10116        static_assert_mantissas_sae!(SAE);
10117        let a = a.as_f32x16();
10118        let r = vrndscaleps(a, IMM8, f32x16::ZERO, 0b11111111_11111111, SAE);
10119        transmute(r)
10120    }
10121}
10122
10123/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10124/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10125/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10126/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10127/// * [`_MM_FROUND_TO_POS_INF`] : round up
10128/// * [`_MM_FROUND_TO_ZERO`] : truncate
10129/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10130///
10131/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
10133#[inline]
10134#[target_feature(enable = "avx512f")]
10135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10136#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10137#[rustc_legacy_const_generics(3, 4)]
10138pub fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10139    src: __m512,
10140    k: __mmask16,
10141    a: __m512,
10142) -> __m512 {
10143    unsafe {
10144        static_assert_uimm_bits!(IMM8, 8);
10145        static_assert_mantissas_sae!(SAE);
10146        let a = a.as_f32x16();
10147        let src = src.as_f32x16();
10148        let r = vrndscaleps(a, IMM8, src, k, SAE);
10149        transmute(r)
10150    }
10151}
10152
10153/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10154/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10155/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10156/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10157/// * [`_MM_FROUND_TO_POS_INF`] : round up
10158/// * [`_MM_FROUND_TO_ZERO`] : truncate
10159/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10160///
10161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
10163#[inline]
10164#[target_feature(enable = "avx512f")]
10165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10166#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10167#[rustc_legacy_const_generics(2, 3)]
10168pub fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10169    k: __mmask16,
10170    a: __m512,
10171) -> __m512 {
10172    unsafe {
10173        static_assert_uimm_bits!(IMM8, 8);
10174        static_assert_mantissas_sae!(SAE);
10175        let a = a.as_f32x16();
10176        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, SAE);
10177        transmute(r)
10178    }
10179}
10180
10181/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10182/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10183/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10184/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10185/// * [`_MM_FROUND_TO_POS_INF`] : round up
10186/// * [`_MM_FROUND_TO_ZERO`] : truncate
10187/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10188///
10189/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
10191#[inline]
10192#[target_feature(enable = "avx512f")]
10193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10194#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10195#[rustc_legacy_const_generics(1, 2)]
10196pub fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
10197    unsafe {
10198        static_assert_uimm_bits!(IMM8, 8);
10199        static_assert_mantissas_sae!(SAE);
10200        let a = a.as_f64x8();
10201        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, SAE);
10202        transmute(r)
10203    }
10204}
10205
10206/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10207/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10208/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10209/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10210/// * [`_MM_FROUND_TO_POS_INF`] : round up
10211/// * [`_MM_FROUND_TO_ZERO`] : truncate
10212/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10213///
10214/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
10216#[inline]
10217#[target_feature(enable = "avx512f")]
10218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10219#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10220#[rustc_legacy_const_generics(3, 4)]
10221pub fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10222    src: __m512d,
10223    k: __mmask8,
10224    a: __m512d,
10225) -> __m512d {
10226    unsafe {
10227        static_assert_uimm_bits!(IMM8, 8);
10228        static_assert_mantissas_sae!(SAE);
10229        let a = a.as_f64x8();
10230        let src = src.as_f64x8();
10231        let r = vrndscalepd(a, IMM8, src, k, SAE);
10232        transmute(r)
10233    }
10234}
10235
10236/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10237/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10238/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10239/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10240/// * [`_MM_FROUND_TO_POS_INF`] : round up
10241/// * [`_MM_FROUND_TO_ZERO`] : truncate
10242/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10243///
10244/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
10246#[inline]
10247#[target_feature(enable = "avx512f")]
10248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10249#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10250#[rustc_legacy_const_generics(2, 3)]
10251pub fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10252    k: __mmask8,
10253    a: __m512d,
10254) -> __m512d {
10255    unsafe {
10256        static_assert_uimm_bits!(IMM8, 8);
10257        static_assert_mantissas_sae!(SAE);
10258        let a = a.as_f64x8();
10259        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, SAE);
10260        transmute(r)
10261    }
10262}
10263
10264/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
10265///
10266/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10267/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10268/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10269/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10270/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10271/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10272///
10273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
10274#[inline]
10275#[target_feature(enable = "avx512f")]
10276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10277#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10278#[rustc_legacy_const_generics(2)]
10279pub fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
10280    unsafe {
10281        static_assert_rounding!(ROUNDING);
10282        let a = a.as_f32x16();
10283        let b = b.as_f32x16();
10284        let r = vscalefps(a, b, f32x16::ZERO, 0b11111111_11111111, ROUNDING);
10285        transmute(r)
10286    }
10287}
10288
10289/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10290///
10291/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10292/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10293/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10294/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10295/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10296/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10297///
10298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
10299#[inline]
10300#[target_feature(enable = "avx512f")]
10301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10302#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10303#[rustc_legacy_const_generics(4)]
10304pub fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
10305    src: __m512,
10306    k: __mmask16,
10307    a: __m512,
10308    b: __m512,
10309) -> __m512 {
10310    unsafe {
10311        static_assert_rounding!(ROUNDING);
10312        let a = a.as_f32x16();
10313        let b = b.as_f32x16();
10314        let src = src.as_f32x16();
10315        let r = vscalefps(a, b, src, k, ROUNDING);
10316        transmute(r)
10317    }
10318}
10319
10320/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10321///
10322/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10323/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10324/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10325/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10326/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10327/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10328///
10329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
10330#[inline]
10331#[target_feature(enable = "avx512f")]
10332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10333#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10334#[rustc_legacy_const_generics(3)]
10335pub fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
10336    k: __mmask16,
10337    a: __m512,
10338    b: __m512,
10339) -> __m512 {
10340    unsafe {
10341        static_assert_rounding!(ROUNDING);
10342        let a = a.as_f32x16();
10343        let b = b.as_f32x16();
10344        let r = vscalefps(a, b, f32x16::ZERO, k, ROUNDING);
10345        transmute(r)
10346    }
10347}
10348
10349/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
10350///
10351/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10352/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10353/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10354/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10355/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10356/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10357///
10358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
10359#[inline]
10360#[target_feature(enable = "avx512f")]
10361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10362#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10363#[rustc_legacy_const_generics(2)]
10364pub fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
10365    unsafe {
10366        static_assert_rounding!(ROUNDING);
10367        let a = a.as_f64x8();
10368        let b = b.as_f64x8();
10369        let r = vscalefpd(a, b, f64x8::ZERO, 0b11111111, ROUNDING);
10370        transmute(r)
10371    }
10372}
10373
10374/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10375///
10376/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10377/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10378/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10379/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10380/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10381/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10382///
10383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
10384#[inline]
10385#[target_feature(enable = "avx512f")]
10386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10387#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10388#[rustc_legacy_const_generics(4)]
10389pub fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
10390    src: __m512d,
10391    k: __mmask8,
10392    a: __m512d,
10393    b: __m512d,
10394) -> __m512d {
10395    unsafe {
10396        static_assert_rounding!(ROUNDING);
10397        let a = a.as_f64x8();
10398        let b = b.as_f64x8();
10399        let src = src.as_f64x8();
10400        let r = vscalefpd(a, b, src, k, ROUNDING);
10401        transmute(r)
10402    }
10403}
10404
10405/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10406///
10407/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10408/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10409/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10410/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10411/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10412/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10413///
10414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
10415#[inline]
10416#[target_feature(enable = "avx512f")]
10417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10418#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10419#[rustc_legacy_const_generics(3)]
10420pub fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
10421    k: __mmask8,
10422    a: __m512d,
10423    b: __m512d,
10424) -> __m512d {
10425    unsafe {
10426        static_assert_rounding!(ROUNDING);
10427        let a = a.as_f64x8();
10428        let b = b.as_f64x8();
10429        let r = vscalefpd(a, b, f64x8::ZERO, k, ROUNDING);
10430        transmute(r)
10431    }
10432}
10433
10434/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10435///
10436/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
10438#[inline]
10439#[target_feature(enable = "avx512f")]
10440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10441#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10442#[rustc_legacy_const_generics(3, 4)]
10443pub fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10444    a: __m512,
10445    b: __m512,
10446    c: __m512i,
10447) -> __m512 {
10448    unsafe {
10449        static_assert_uimm_bits!(IMM8, 8);
10450        static_assert_mantissas_sae!(SAE);
10451        let a = a.as_f32x16();
10452        let b = b.as_f32x16();
10453        let c = c.as_i32x16();
10454        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, SAE);
10455        transmute(r)
10456    }
10457}
10458
10459/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10460///
10461/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
10463#[inline]
10464#[target_feature(enable = "avx512f")]
10465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10466#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10467#[rustc_legacy_const_generics(4, 5)]
10468pub fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10469    a: __m512,
10470    k: __mmask16,
10471    b: __m512,
10472    c: __m512i,
10473) -> __m512 {
10474    unsafe {
10475        static_assert_uimm_bits!(IMM8, 8);
10476        static_assert_mantissas_sae!(SAE);
10477        let a = a.as_f32x16();
10478        let b = b.as_f32x16();
10479        let c = c.as_i32x16();
10480        let r = vfixupimmps(a, b, c, IMM8, k, SAE);
10481        transmute(r)
10482    }
10483}
10484
10485/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10486///
10487/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
10489#[inline]
10490#[target_feature(enable = "avx512f")]
10491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10492#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10493#[rustc_legacy_const_generics(4, 5)]
10494pub fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10495    k: __mmask16,
10496    a: __m512,
10497    b: __m512,
10498    c: __m512i,
10499) -> __m512 {
10500    unsafe {
10501        static_assert_uimm_bits!(IMM8, 8);
10502        static_assert_mantissas_sae!(SAE);
10503        let a = a.as_f32x16();
10504        let b = b.as_f32x16();
10505        let c = c.as_i32x16();
10506        let r = vfixupimmpsz(a, b, c, IMM8, k, SAE);
10507        transmute(r)
10508    }
10509}
10510
10511/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10512///
10513/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
10515#[inline]
10516#[target_feature(enable = "avx512f")]
10517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10518#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10519#[rustc_legacy_const_generics(3, 4)]
10520pub fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10521    a: __m512d,
10522    b: __m512d,
10523    c: __m512i,
10524) -> __m512d {
10525    unsafe {
10526        static_assert_uimm_bits!(IMM8, 8);
10527        static_assert_mantissas_sae!(SAE);
10528        let a = a.as_f64x8();
10529        let b = b.as_f64x8();
10530        let c = c.as_i64x8();
10531        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, SAE);
10532        transmute(r)
10533    }
10534}
10535
10536/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10537///
10538/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
10540#[inline]
10541#[target_feature(enable = "avx512f")]
10542#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10543#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10544#[rustc_legacy_const_generics(4, 5)]
10545pub fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10546    a: __m512d,
10547    k: __mmask8,
10548    b: __m512d,
10549    c: __m512i,
10550) -> __m512d {
10551    unsafe {
10552        static_assert_uimm_bits!(IMM8, 8);
10553        static_assert_mantissas_sae!(SAE);
10554        let a = a.as_f64x8();
10555        let b = b.as_f64x8();
10556        let c = c.as_i64x8();
10557        let r = vfixupimmpd(a, b, c, IMM8, k, SAE);
10558        transmute(r)
10559    }
10560}
10561
10562/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10563///
10564/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
10566#[inline]
10567#[target_feature(enable = "avx512f")]
10568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10569#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10570#[rustc_legacy_const_generics(4, 5)]
10571pub fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10572    k: __mmask8,
10573    a: __m512d,
10574    b: __m512d,
10575    c: __m512i,
10576) -> __m512d {
10577    unsafe {
10578        static_assert_uimm_bits!(IMM8, 8);
10579        static_assert_mantissas_sae!(SAE);
10580        let a = a.as_f64x8();
10581        let b = b.as_f64x8();
10582        let c = c.as_i64x8();
10583        let r = vfixupimmpdz(a, b, c, IMM8, k, SAE);
10584        transmute(r)
10585    }
10586}
10587
10588/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10589/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10590///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10591///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10592///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10593///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10594/// The sign is determined by sc which can take the following values:\
10595///    _MM_MANT_SIGN_src     // sign = sign(src)\
10596///    _MM_MANT_SIGN_zero    // sign = 0\
10597///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10598/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10599///
10600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
10601#[inline]
10602#[target_feature(enable = "avx512f")]
10603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10604#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10605#[rustc_legacy_const_generics(1, 2, 3)]
10606pub fn _mm512_getmant_round_ps<
10607    const NORM: _MM_MANTISSA_NORM_ENUM,
10608    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10609    const SAE: i32,
10610>(
10611    a: __m512,
10612) -> __m512 {
10613    unsafe {
10614        static_assert_uimm_bits!(NORM, 4);
10615        static_assert_uimm_bits!(SIGN, 2);
10616        static_assert_mantissas_sae!(SAE);
10617        let a = a.as_f32x16();
10618        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, 0b11111111_11111111, SAE);
10619        transmute(r)
10620    }
10621}
10622
10623/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10624/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10625///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10626///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10627///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10628///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10629/// The sign is determined by sc which can take the following values:\
10630///    _MM_MANT_SIGN_src     // sign = sign(src)\
10631///    _MM_MANT_SIGN_zero    // sign = 0\
10632///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10633/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10634///
10635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
10636#[inline]
10637#[target_feature(enable = "avx512f")]
10638#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10639#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10640#[rustc_legacy_const_generics(3, 4, 5)]
10641pub fn _mm512_mask_getmant_round_ps<
10642    const NORM: _MM_MANTISSA_NORM_ENUM,
10643    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10644    const SAE: i32,
10645>(
10646    src: __m512,
10647    k: __mmask16,
10648    a: __m512,
10649) -> __m512 {
10650    unsafe {
10651        static_assert_uimm_bits!(NORM, 4);
10652        static_assert_uimm_bits!(SIGN, 2);
10653        static_assert_mantissas_sae!(SAE);
10654        let a = a.as_f32x16();
10655        let src = src.as_f32x16();
10656        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, SAE);
10657        transmute(r)
10658    }
10659}
10660
10661/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10662/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10663///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10664///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10665///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10666///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10667/// The sign is determined by sc which can take the following values:\
10668///    _MM_MANT_SIGN_src     // sign = sign(src)\
10669///    _MM_MANT_SIGN_zero    // sign = 0\
10670///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10671/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10672///
10673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
10674#[inline]
10675#[target_feature(enable = "avx512f")]
10676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10677#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10678#[rustc_legacy_const_generics(2, 3, 4)]
10679pub fn _mm512_maskz_getmant_round_ps<
10680    const NORM: _MM_MANTISSA_NORM_ENUM,
10681    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10682    const SAE: i32,
10683>(
10684    k: __mmask16,
10685    a: __m512,
10686) -> __m512 {
10687    unsafe {
10688        static_assert_uimm_bits!(NORM, 4);
10689        static_assert_uimm_bits!(SIGN, 2);
10690        static_assert_mantissas_sae!(SAE);
10691        let a = a.as_f32x16();
10692        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, k, SAE);
10693        transmute(r)
10694    }
10695}
10696
10697/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10698/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10699///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10700///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10701///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10702///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10703/// The sign is determined by sc which can take the following values:\
10704///    _MM_MANT_SIGN_src     // sign = sign(src)\
10705///    _MM_MANT_SIGN_zero    // sign = 0\
10706///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10707/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10708///
10709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
10710#[inline]
10711#[target_feature(enable = "avx512f")]
10712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10713#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10714#[rustc_legacy_const_generics(1, 2, 3)]
10715pub fn _mm512_getmant_round_pd<
10716    const NORM: _MM_MANTISSA_NORM_ENUM,
10717    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10718    const SAE: i32,
10719>(
10720    a: __m512d,
10721) -> __m512d {
10722    unsafe {
10723        static_assert_uimm_bits!(NORM, 4);
10724        static_assert_uimm_bits!(SIGN, 2);
10725        static_assert_mantissas_sae!(SAE);
10726        let a = a.as_f64x8();
10727        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, 0b11111111, SAE);
10728        transmute(r)
10729    }
10730}
10731
10732/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10733/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10734///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10735///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10736///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10737///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10738/// The sign is determined by sc which can take the following values:\
10739///    _MM_MANT_SIGN_src     // sign = sign(src)\
10740///    _MM_MANT_SIGN_zero    // sign = 0\
10741///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10742/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10743///
10744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
10745#[inline]
10746#[target_feature(enable = "avx512f")]
10747#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10748#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10749#[rustc_legacy_const_generics(3, 4, 5)]
10750pub fn _mm512_mask_getmant_round_pd<
10751    const NORM: _MM_MANTISSA_NORM_ENUM,
10752    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10753    const SAE: i32,
10754>(
10755    src: __m512d,
10756    k: __mmask8,
10757    a: __m512d,
10758) -> __m512d {
10759    unsafe {
10760        static_assert_uimm_bits!(NORM, 4);
10761        static_assert_uimm_bits!(SIGN, 2);
10762        static_assert_mantissas_sae!(SAE);
10763        let a = a.as_f64x8();
10764        let src = src.as_f64x8();
10765        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, SAE);
10766        transmute(r)
10767    }
10768}
10769
10770/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10771/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10772///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10773///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10774///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10775///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10776/// The sign is determined by sc which can take the following values:\
10777///    _MM_MANT_SIGN_src     // sign = sign(src)\
10778///    _MM_MANT_SIGN_zero    // sign = 0\
10779///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10781///
10782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_pd&expand=2885)
10783#[inline]
10784#[target_feature(enable = "avx512f")]
10785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10786#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10787#[rustc_legacy_const_generics(2, 3, 4)]
10788pub fn _mm512_maskz_getmant_round_pd<
10789    const NORM: _MM_MANTISSA_NORM_ENUM,
10790    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10791    const SAE: i32,
10792>(
10793    k: __mmask8,
10794    a: __m512d,
10795) -> __m512d {
10796    unsafe {
10797        static_assert_uimm_bits!(NORM, 4);
10798        static_assert_uimm_bits!(SIGN, 2);
10799        static_assert_mantissas_sae!(SAE);
10800        let a = a.as_f64x8();
10801        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, k, SAE);
10802        transmute(r)
10803    }
10804}
10805
10806/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
10807///
10808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)
10809#[inline]
10810#[target_feature(enable = "avx512f")]
10811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10812#[cfg_attr(test, assert_instr(vcvtps2dq))]
10813pub fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
10814    unsafe {
10815        transmute(vcvtps2dq(
10816            a.as_f32x16(),
10817            i32x16::ZERO,
10818            0b11111111_11111111,
10819            _MM_FROUND_CUR_DIRECTION,
10820        ))
10821    }
10822}
10823
10824/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10825///
10826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
10827#[inline]
10828#[target_feature(enable = "avx512f")]
10829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10830#[cfg_attr(test, assert_instr(vcvtps2dq))]
10831pub fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10832    unsafe {
10833        transmute(vcvtps2dq(
10834            a.as_f32x16(),
10835            src.as_i32x16(),
10836            k,
10837            _MM_FROUND_CUR_DIRECTION,
10838        ))
10839    }
10840}
10841
10842/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10843///
10844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
10845#[inline]
10846#[target_feature(enable = "avx512f")]
10847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10848#[cfg_attr(test, assert_instr(vcvtps2dq))]
10849pub fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
10850    unsafe {
10851        transmute(vcvtps2dq(
10852            a.as_f32x16(),
10853            i32x16::ZERO,
10854            k,
10855            _MM_FROUND_CUR_DIRECTION,
10856        ))
10857    }
10858}
10859
10860/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10861///
10862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
10863#[inline]
10864#[target_feature(enable = "avx512f,avx512vl")]
10865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10866#[cfg_attr(test, assert_instr(vcvtps2dq))]
10867pub fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10868    unsafe {
10869        let convert = _mm256_cvtps_epi32(a);
10870        transmute(simd_select_bitmask(k, convert.as_i32x8(), src.as_i32x8()))
10871    }
10872}
10873
10874/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10875///
10876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
10877#[inline]
10878#[target_feature(enable = "avx512f,avx512vl")]
10879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10880#[cfg_attr(test, assert_instr(vcvtps2dq))]
10881pub fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
10882    unsafe {
10883        let convert = _mm256_cvtps_epi32(a);
10884        transmute(simd_select_bitmask(k, convert.as_i32x8(), i32x8::ZERO))
10885    }
10886}
10887
10888/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10889///
10890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
10891#[inline]
10892#[target_feature(enable = "avx512f,avx512vl")]
10893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10894#[cfg_attr(test, assert_instr(vcvtps2dq))]
10895pub fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
10896    unsafe {
10897        let convert = _mm_cvtps_epi32(a);
10898        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
10899    }
10900}
10901
10902/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10903///
10904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
10905#[inline]
10906#[target_feature(enable = "avx512f,avx512vl")]
10907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10908#[cfg_attr(test, assert_instr(vcvtps2dq))]
10909pub fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
10910    unsafe {
10911        let convert = _mm_cvtps_epi32(a);
10912        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
10913    }
10914}
10915
10916/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10917///
10918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
10919#[inline]
10920#[target_feature(enable = "avx512f")]
10921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10922#[cfg_attr(test, assert_instr(vcvtps2udq))]
10923pub fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
10924    unsafe {
10925        transmute(vcvtps2udq(
10926            a.as_f32x16(),
10927            u32x16::ZERO,
10928            0b11111111_11111111,
10929            _MM_FROUND_CUR_DIRECTION,
10930        ))
10931    }
10932}
10933
10934/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10935///
10936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
10937#[inline]
10938#[target_feature(enable = "avx512f")]
10939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10940#[cfg_attr(test, assert_instr(vcvtps2udq))]
10941pub fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10942    unsafe {
10943        transmute(vcvtps2udq(
10944            a.as_f32x16(),
10945            src.as_u32x16(),
10946            k,
10947            _MM_FROUND_CUR_DIRECTION,
10948        ))
10949    }
10950}
10951
10952/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10953///
10954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343)
10955#[inline]
10956#[target_feature(enable = "avx512f")]
10957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10958#[cfg_attr(test, assert_instr(vcvtps2udq))]
10959pub fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
10960    unsafe {
10961        transmute(vcvtps2udq(
10962            a.as_f32x16(),
10963            u32x16::ZERO,
10964            k,
10965            _MM_FROUND_CUR_DIRECTION,
10966        ))
10967    }
10968}
10969
10970/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10971///
10972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
10973#[inline]
10974#[target_feature(enable = "avx512f,avx512vl")]
10975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10976#[cfg_attr(test, assert_instr(vcvtps2udq))]
10977pub fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
10978    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
10979}
10980
10981/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10982///
10983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
10984#[inline]
10985#[target_feature(enable = "avx512f,avx512vl")]
10986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10987#[cfg_attr(test, assert_instr(vcvtps2udq))]
10988pub fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10989    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
10990}
10991
10992/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10993///
10994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
10995#[inline]
10996#[target_feature(enable = "avx512f,avx512vl")]
10997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10998#[cfg_attr(test, assert_instr(vcvtps2udq))]
10999pub fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
11000    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
11001}
11002
11003/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11004///
11005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
11006#[inline]
11007#[target_feature(enable = "avx512f,avx512vl")]
11008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11009#[cfg_attr(test, assert_instr(vcvtps2udq))]
11010pub fn _mm_cvtps_epu32(a: __m128) -> __m128i {
11011    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
11012}
11013
11014/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11015///
11016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
11017#[inline]
11018#[target_feature(enable = "avx512f,avx512vl")]
11019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11020#[cfg_attr(test, assert_instr(vcvtps2udq))]
11021pub fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11022    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
11023}
11024
11025/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11026///
11027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
11028#[inline]
11029#[target_feature(enable = "avx512f,avx512vl")]
11030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11031#[cfg_attr(test, assert_instr(vcvtps2udq))]
11032pub fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
11033    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
11034}
11035
11036/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11037///
11038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
11039#[inline]
11040#[target_feature(enable = "avx512f")]
11041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11042#[cfg_attr(test, assert_instr(vcvtps2pd))]
11043pub fn _mm512_cvtps_pd(a: __m256) -> __m512d {
11044    unsafe {
11045        transmute(vcvtps2pd(
11046            a.as_f32x8(),
11047            f64x8::ZERO,
11048            0b11111111,
11049            _MM_FROUND_CUR_DIRECTION,
11050        ))
11051    }
11052}
11053
11054/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11055///
11056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
11057#[inline]
11058#[target_feature(enable = "avx512f")]
11059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11060#[cfg_attr(test, assert_instr(vcvtps2pd))]
11061pub fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
11062    unsafe {
11063        transmute(vcvtps2pd(
11064            a.as_f32x8(),
11065            src.as_f64x8(),
11066            k,
11067            _MM_FROUND_CUR_DIRECTION,
11068        ))
11069    }
11070}
11071
11072/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11073///
11074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
11075#[inline]
11076#[target_feature(enable = "avx512f")]
11077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11078#[cfg_attr(test, assert_instr(vcvtps2pd))]
11079pub fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
11080    unsafe {
11081        transmute(vcvtps2pd(
11082            a.as_f32x8(),
11083            f64x8::ZERO,
11084            k,
11085            _MM_FROUND_CUR_DIRECTION,
11086        ))
11087    }
11088}
11089
11090/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
11091///
11092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
11093#[inline]
11094#[target_feature(enable = "avx512f")]
11095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11096#[cfg_attr(test, assert_instr(vcvtps2pd))]
11097pub fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
11098    unsafe {
11099        transmute(vcvtps2pd(
11100            _mm512_castps512_ps256(v2).as_f32x8(),
11101            f64x8::ZERO,
11102            0b11111111,
11103            _MM_FROUND_CUR_DIRECTION,
11104        ))
11105    }
11106}
11107
11108/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11109///
11110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
11111#[inline]
11112#[target_feature(enable = "avx512f")]
11113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11114#[cfg_attr(test, assert_instr(vcvtps2pd))]
11115pub fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
11116    unsafe {
11117        transmute(vcvtps2pd(
11118            _mm512_castps512_ps256(v2).as_f32x8(),
11119            src.as_f64x8(),
11120            k,
11121            _MM_FROUND_CUR_DIRECTION,
11122        ))
11123    }
11124}
11125
11126/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
11127///
11128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
11129#[inline]
11130#[target_feature(enable = "avx512f")]
11131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11132#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11133pub fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
11134    unsafe {
11135        transmute(vcvtpd2ps(
11136            a.as_f64x8(),
11137            f32x8::ZERO,
11138            0b11111111,
11139            _MM_FROUND_CUR_DIRECTION,
11140        ))
11141    }
11142}
11143
11144/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11145///
11146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
11147#[inline]
11148#[target_feature(enable = "avx512f")]
11149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11150#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11151pub fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
11152    unsafe {
11153        transmute(vcvtpd2ps(
11154            a.as_f64x8(),
11155            src.as_f32x8(),
11156            k,
11157            _MM_FROUND_CUR_DIRECTION,
11158        ))
11159    }
11160}
11161
11162/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11163///
11164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
11165#[inline]
11166#[target_feature(enable = "avx512f")]
11167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11168#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11169pub fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
11170    unsafe {
11171        transmute(vcvtpd2ps(
11172            a.as_f64x8(),
11173            f32x8::ZERO,
11174            k,
11175            _MM_FROUND_CUR_DIRECTION,
11176        ))
11177    }
11178}
11179
11180/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11181///
11182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
11183#[inline]
11184#[target_feature(enable = "avx512f,avx512vl")]
11185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11186#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11187pub fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
11188    unsafe {
11189        let convert = _mm256_cvtpd_ps(a);
11190        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
11191    }
11192}
11193
11194/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11195///
11196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
11197#[inline]
11198#[target_feature(enable = "avx512f,avx512vl")]
11199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11200#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11201pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
11202    unsafe {
11203        let convert = _mm256_cvtpd_ps(a);
11204        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11205    }
11206}
11207
11208/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11209///
11210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
11211#[inline]
11212#[target_feature(enable = "avx512f,avx512vl")]
11213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11214#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11215pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
11216    unsafe {
11217        let convert = _mm_cvtpd_ps(a);
11218        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
11219    }
11220}
11221
11222/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11223///
11224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
11225#[inline]
11226#[target_feature(enable = "avx512f,avx512vl")]
11227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11228#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11229pub fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
11230    unsafe {
11231        let convert = _mm_cvtpd_ps(a);
11232        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11233    }
11234}
11235
11236/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11237///
11238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
11239#[inline]
11240#[target_feature(enable = "avx512f")]
11241#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11242#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11243pub fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
11244    unsafe {
11245        transmute(vcvtpd2dq(
11246            a.as_f64x8(),
11247            i32x8::ZERO,
11248            0b11111111,
11249            _MM_FROUND_CUR_DIRECTION,
11250        ))
11251    }
11252}
11253
11254/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11255///
11256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
11257#[inline]
11258#[target_feature(enable = "avx512f")]
11259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11260#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11261pub fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11262    unsafe {
11263        transmute(vcvtpd2dq(
11264            a.as_f64x8(),
11265            src.as_i32x8(),
11266            k,
11267            _MM_FROUND_CUR_DIRECTION,
11268        ))
11269    }
11270}
11271
11272/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11273///
11274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
11275#[inline]
11276#[target_feature(enable = "avx512f")]
11277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11278#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11279pub fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
11280    unsafe {
11281        transmute(vcvtpd2dq(
11282            a.as_f64x8(),
11283            i32x8::ZERO,
11284            k,
11285            _MM_FROUND_CUR_DIRECTION,
11286        ))
11287    }
11288}
11289
11290/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11291///
11292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
11293#[inline]
11294#[target_feature(enable = "avx512f,avx512vl")]
11295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11296#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11297pub fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11298    unsafe {
11299        let convert = _mm256_cvtpd_epi32(a);
11300        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11301    }
11302}
11303
11304/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11305///
11306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
11307#[inline]
11308#[target_feature(enable = "avx512f,avx512vl")]
11309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11310#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11311pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
11312    unsafe {
11313        let convert = _mm256_cvtpd_epi32(a);
11314        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11315    }
11316}
11317
11318/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11319///
11320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
11321#[inline]
11322#[target_feature(enable = "avx512f,avx512vl")]
11323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11324#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11325pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11326    unsafe {
11327        let convert = _mm_cvtpd_epi32(a);
11328        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11329    }
11330}
11331
11332/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11333///
11334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
11335#[inline]
11336#[target_feature(enable = "avx512f,avx512vl")]
11337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11338#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11339pub fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
11340    unsafe {
11341        let convert = _mm_cvtpd_epi32(a);
11342        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11343    }
11344}
11345
11346/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11347///
11348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
11349#[inline]
11350#[target_feature(enable = "avx512f")]
11351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11352#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11353pub fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
11354    unsafe {
11355        transmute(vcvtpd2udq(
11356            a.as_f64x8(),
11357            u32x8::ZERO,
11358            0b11111111,
11359            _MM_FROUND_CUR_DIRECTION,
11360        ))
11361    }
11362}
11363
11364/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11365///
11366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
11367#[inline]
11368#[target_feature(enable = "avx512f")]
11369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11370#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11371pub fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11372    unsafe {
11373        transmute(vcvtpd2udq(
11374            a.as_f64x8(),
11375            src.as_u32x8(),
11376            k,
11377            _MM_FROUND_CUR_DIRECTION,
11378        ))
11379    }
11380}
11381
11382/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11383///
11384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
11385#[inline]
11386#[target_feature(enable = "avx512f")]
11387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11388#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11389pub fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
11390    unsafe {
11391        transmute(vcvtpd2udq(
11392            a.as_f64x8(),
11393            u32x8::ZERO,
11394            k,
11395            _MM_FROUND_CUR_DIRECTION,
11396        ))
11397    }
11398}
11399
11400/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11401///
11402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
11403#[inline]
11404#[target_feature(enable = "avx512f,avx512vl")]
11405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11406#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11407pub fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
11408    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, 0b11111111)) }
11409}
11410
11411/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11412///
11413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
11414#[inline]
11415#[target_feature(enable = "avx512f,avx512vl")]
11416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11417#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11418pub fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11419    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), k)) }
11420}
11421
11422/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11423///
11424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
11425#[inline]
11426#[target_feature(enable = "avx512f,avx512vl")]
11427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11428#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11429pub fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
11430    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, k)) }
11431}
11432
11433/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11434///
11435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
11436#[inline]
11437#[target_feature(enable = "avx512f,avx512vl")]
11438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11439#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11440pub fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
11441    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, 0b11111111)) }
11442}
11443
11444/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11445///
11446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
11447#[inline]
11448#[target_feature(enable = "avx512f,avx512vl")]
11449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11450#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11451pub fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11452    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), k)) }
11453}
11454
11455/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11456///
11457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
11458#[inline]
11459#[target_feature(enable = "avx512f,avx512vl")]
11460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11461#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11462pub fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
11463    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, k)) }
11464}
11465
11466/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11467///
11468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
11469#[inline]
11470#[target_feature(enable = "avx512f")]
11471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11472#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11473pub fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
11474    unsafe {
11475        let r: f32x8 = vcvtpd2ps(
11476            v2.as_f64x8(),
11477            f32x8::ZERO,
11478            0b11111111,
11479            _MM_FROUND_CUR_DIRECTION,
11480        );
11481        simd_shuffle!(
11482            r,
11483            f32x8::ZERO,
11484            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11485        )
11486    }
11487}
11488
11489/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11490///
11491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
11492#[inline]
11493#[target_feature(enable = "avx512f")]
11494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11495#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11496pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
11497    unsafe {
11498        let r: f32x8 = vcvtpd2ps(
11499            v2.as_f64x8(),
11500            _mm512_castps512_ps256(src).as_f32x8(),
11501            k,
11502            _MM_FROUND_CUR_DIRECTION,
11503        );
11504        simd_shuffle!(
11505            r,
11506            f32x8::ZERO,
11507            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11508        )
11509    }
11510}
11511
11512/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11513///
11514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
11515#[inline]
11516#[target_feature(enable = "avx512f")]
11517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11518#[cfg_attr(test, assert_instr(vpmovsxbd))]
11519pub fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
11520    unsafe {
11521        let a = a.as_i8x16();
11522        transmute::<i32x16, _>(simd_cast(a))
11523    }
11524}
11525
11526/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11527///
11528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
11529#[inline]
11530#[target_feature(enable = "avx512f")]
11531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11532#[cfg_attr(test, assert_instr(vpmovsxbd))]
11533pub fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11534    unsafe {
11535        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11536        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11537    }
11538}
11539
11540/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11541///
11542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
11543#[inline]
11544#[target_feature(enable = "avx512f")]
11545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11546#[cfg_attr(test, assert_instr(vpmovsxbd))]
11547pub fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11548    unsafe {
11549        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11550        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11551    }
11552}
11553
11554/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11555///
11556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
11557#[inline]
11558#[target_feature(enable = "avx512f,avx512vl")]
11559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11560#[cfg_attr(test, assert_instr(vpmovsxbd))]
11561pub fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11562    unsafe {
11563        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11564        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11565    }
11566}
11567
11568/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11569///
11570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
11571#[inline]
11572#[target_feature(enable = "avx512f,avx512vl")]
11573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11574#[cfg_attr(test, assert_instr(vpmovsxbd))]
11575pub fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11576    unsafe {
11577        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11578        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11579    }
11580}
11581
11582/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11583///
11584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
11585#[inline]
11586#[target_feature(enable = "avx512f,avx512vl")]
11587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11588#[cfg_attr(test, assert_instr(vpmovsxbd))]
11589pub fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11590    unsafe {
11591        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11592        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11593    }
11594}
11595
11596/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11597///
11598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
11599#[inline]
11600#[target_feature(enable = "avx512f,avx512vl")]
11601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11602#[cfg_attr(test, assert_instr(vpmovsxbd))]
11603pub fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11604    unsafe {
11605        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11606        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11607    }
11608}
11609
11610/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
11611///
11612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
11613#[inline]
11614#[target_feature(enable = "avx512f")]
11615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11616#[cfg_attr(test, assert_instr(vpmovsxbq))]
11617pub fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
11618    unsafe {
11619        let a = a.as_i8x16();
11620        let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11621        transmute::<i64x8, _>(simd_cast(v64))
11622    }
11623}
11624
11625/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11626///
11627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
11628#[inline]
11629#[target_feature(enable = "avx512f")]
11630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11631#[cfg_attr(test, assert_instr(vpmovsxbq))]
11632pub fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11633    unsafe {
11634        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11635        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11636    }
11637}
11638
11639/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11640///
11641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
11642#[inline]
11643#[target_feature(enable = "avx512f")]
11644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11645#[cfg_attr(test, assert_instr(vpmovsxbq))]
11646pub fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11647    unsafe {
11648        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11649        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11650    }
11651}
11652
11653/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11654///
11655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
11656#[inline]
11657#[target_feature(enable = "avx512f,avx512vl")]
11658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11659#[cfg_attr(test, assert_instr(vpmovsxbq))]
11660pub fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11661    unsafe {
11662        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11663        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11664    }
11665}
11666
11667/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11668///
11669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
11670#[inline]
11671#[target_feature(enable = "avx512f,avx512vl")]
11672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11673#[cfg_attr(test, assert_instr(vpmovsxbq))]
11674pub fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11675    unsafe {
11676        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11677        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11678    }
11679}
11680
11681/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11682///
11683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
11684#[inline]
11685#[target_feature(enable = "avx512f,avx512vl")]
11686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11687#[cfg_attr(test, assert_instr(vpmovsxbq))]
11688pub fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11689    unsafe {
11690        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11691        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11692    }
11693}
11694
11695/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11696///
11697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
11698#[inline]
11699#[target_feature(enable = "avx512f,avx512vl")]
11700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11701#[cfg_attr(test, assert_instr(vpmovsxbq))]
11702pub fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11703    unsafe {
11704        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11705        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11706    }
11707}
11708
11709/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11710///
11711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
11712#[inline]
11713#[target_feature(enable = "avx512f")]
11714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11715#[cfg_attr(test, assert_instr(vpmovzxbd))]
11716pub fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
11717    unsafe {
11718        let a = a.as_u8x16();
11719        transmute::<i32x16, _>(simd_cast(a))
11720    }
11721}
11722
11723/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11724///
11725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
11726#[inline]
11727#[target_feature(enable = "avx512f")]
11728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11729#[cfg_attr(test, assert_instr(vpmovzxbd))]
11730pub fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11731    unsafe {
11732        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
11733        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11734    }
11735}
11736
11737/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11738///
11739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
11740#[inline]
11741#[target_feature(enable = "avx512f")]
11742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11743#[cfg_attr(test, assert_instr(vpmovzxbd))]
11744pub fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11745    unsafe {
11746        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
11747        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11748    }
11749}
11750
11751/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11752///
11753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
11754#[inline]
11755#[target_feature(enable = "avx512f,avx512vl")]
11756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11757#[cfg_attr(test, assert_instr(vpmovzxbd))]
11758pub fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11759    unsafe {
11760        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
11761        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11762    }
11763}
11764
11765/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11766///
11767/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
11768#[inline]
11769#[target_feature(enable = "avx512f,avx512vl")]
11770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11771#[cfg_attr(test, assert_instr(vpmovzxbd))]
11772pub fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11773    unsafe {
11774        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
11775        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11776    }
11777}
11778
11779/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11780///
11781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
11782#[inline]
11783#[target_feature(enable = "avx512f,avx512vl")]
11784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11785#[cfg_attr(test, assert_instr(vpmovzxbd))]
11786pub fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11787    unsafe {
11788        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
11789        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11790    }
11791}
11792
11793/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11794///
11795/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
11796#[inline]
11797#[target_feature(enable = "avx512f,avx512vl")]
11798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11799#[cfg_attr(test, assert_instr(vpmovzxbd))]
11800pub fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11801    unsafe {
11802        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
11803        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11804    }
11805}
11806
11807/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
11808///
11809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
11810#[inline]
11811#[target_feature(enable = "avx512f")]
11812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11813#[cfg_attr(test, assert_instr(vpmovzxbq))]
11814pub fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
11815    unsafe {
11816        let a = a.as_u8x16();
11817        let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11818        transmute::<i64x8, _>(simd_cast(v64))
11819    }
11820}
11821
11822/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11823///
11824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
11825#[inline]
11826#[target_feature(enable = "avx512f")]
11827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11828#[cfg_attr(test, assert_instr(vpmovzxbq))]
11829pub fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11830    unsafe {
11831        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
11832        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11833    }
11834}
11835
11836/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11837///
11838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
11839#[inline]
11840#[target_feature(enable = "avx512f")]
11841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11842#[cfg_attr(test, assert_instr(vpmovzxbq))]
11843pub fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11844    unsafe {
11845        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
11846        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11847    }
11848}
11849
11850/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11851///
11852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
11853#[inline]
11854#[target_feature(enable = "avx512f,avx512vl")]
11855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11856#[cfg_attr(test, assert_instr(vpmovzxbq))]
11857pub fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11858    unsafe {
11859        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
11860        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11861    }
11862}
11863
11864/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11865///
11866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
11867#[inline]
11868#[target_feature(enable = "avx512f,avx512vl")]
11869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11870#[cfg_attr(test, assert_instr(vpmovzxbq))]
11871pub fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11872    unsafe {
11873        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
11874        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11875    }
11876}
11877
11878/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11879///
11880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
11881#[inline]
11882#[target_feature(enable = "avx512f,avx512vl")]
11883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11884#[cfg_attr(test, assert_instr(vpmovzxbq))]
11885pub fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11886    unsafe {
11887        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
11888        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11889    }
11890}
11891
11892/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11893///
11894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
11895#[inline]
11896#[target_feature(enable = "avx512f,avx512vl")]
11897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11898#[cfg_attr(test, assert_instr(vpmovzxbq))]
11899pub fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11900    unsafe {
11901        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
11902        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11903    }
11904}
11905
11906/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
11907///
11908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
11909#[inline]
11910#[target_feature(enable = "avx512f")]
11911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11912#[cfg_attr(test, assert_instr(vpmovsxwd))]
11913pub fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
11914    unsafe {
11915        let a = a.as_i16x16();
11916        transmute::<i32x16, _>(simd_cast(a))
11917    }
11918}
11919
11920/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11921///
11922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
11923#[inline]
11924#[target_feature(enable = "avx512f")]
11925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11926#[cfg_attr(test, assert_instr(vpmovsxwd))]
11927pub fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
11928    unsafe {
11929        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
11930        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11931    }
11932}
11933
11934/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11935///
11936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
11937#[inline]
11938#[target_feature(enable = "avx512f")]
11939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11940#[cfg_attr(test, assert_instr(vpmovsxwd))]
11941pub fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
11942    unsafe {
11943        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
11944        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11945    }
11946}
11947
11948/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11949///
11950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
11951#[inline]
11952#[target_feature(enable = "avx512f,avx512vl")]
11953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11954#[cfg_attr(test, assert_instr(vpmovsxwd))]
11955pub fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11956    unsafe {
11957        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
11958        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11959    }
11960}
11961
11962/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11963///
11964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
11965#[inline]
11966#[target_feature(enable = "avx512f,avx512vl")]
11967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11968#[cfg_attr(test, assert_instr(vpmovsxwd))]
11969pub fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
11970    unsafe {
11971        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
11972        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11973    }
11974}
11975
11976/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11977///
11978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
11979#[inline]
11980#[target_feature(enable = "avx512f,avx512vl")]
11981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11982#[cfg_attr(test, assert_instr(vpmovsxwd))]
11983pub fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11984    unsafe {
11985        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
11986        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11987    }
11988}
11989
11990/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11991///
11992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
11993#[inline]
11994#[target_feature(enable = "avx512f,avx512vl")]
11995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11996#[cfg_attr(test, assert_instr(vpmovsxwd))]
11997pub fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
11998    unsafe {
11999        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
12000        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12001    }
12002}
12003
12004/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12005///
12006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
12007#[inline]
12008#[target_feature(enable = "avx512f")]
12009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12010#[cfg_attr(test, assert_instr(vpmovsxwq))]
12011pub fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
12012    unsafe {
12013        let a = a.as_i16x8();
12014        transmute::<i64x8, _>(simd_cast(a))
12015    }
12016}
12017
12018/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12019///
12020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
12021#[inline]
12022#[target_feature(enable = "avx512f")]
12023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12024#[cfg_attr(test, assert_instr(vpmovsxwq))]
12025pub fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12026    unsafe {
12027        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12028        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12029    }
12030}
12031
12032/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12033///
12034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
12035#[inline]
12036#[target_feature(enable = "avx512f")]
12037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12038#[cfg_attr(test, assert_instr(vpmovsxwq))]
12039pub fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12040    unsafe {
12041        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12042        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12043    }
12044}
12045
12046/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12047///
12048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
12049#[inline]
12050#[target_feature(enable = "avx512f,avx512vl")]
12051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12052#[cfg_attr(test, assert_instr(vpmovsxwq))]
12053pub fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12054    unsafe {
12055        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12056        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12057    }
12058}
12059
12060/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12061///
12062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
12063#[inline]
12064#[target_feature(enable = "avx512f,avx512vl")]
12065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12066#[cfg_attr(test, assert_instr(vpmovsxwq))]
12067pub fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12068    unsafe {
12069        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12070        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12071    }
12072}
12073
12074/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12075///
12076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
12077#[inline]
12078#[target_feature(enable = "avx512f,avx512vl")]
12079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12080#[cfg_attr(test, assert_instr(vpmovsxwq))]
12081pub fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12082    unsafe {
12083        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12084        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12085    }
12086}
12087
12088/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12089///
12090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
12091#[inline]
12092#[target_feature(enable = "avx512f,avx512vl")]
12093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12094#[cfg_attr(test, assert_instr(vpmovsxwq))]
12095pub fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12096    unsafe {
12097        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12098        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12099    }
12100}
12101
12102/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12103///
12104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
12105#[inline]
12106#[target_feature(enable = "avx512f")]
12107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12108#[cfg_attr(test, assert_instr(vpmovzxwd))]
12109pub fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
12110    unsafe {
12111        let a = a.as_u16x16();
12112        transmute::<i32x16, _>(simd_cast(a))
12113    }
12114}
12115
12116/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12117///
12118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
12119#[inline]
12120#[target_feature(enable = "avx512f")]
12121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12122#[cfg_attr(test, assert_instr(vpmovzxwd))]
12123pub fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12124    unsafe {
12125        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12126        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
12127    }
12128}
12129
12130/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12131///
12132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
12133#[inline]
12134#[target_feature(enable = "avx512f")]
12135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12136#[cfg_attr(test, assert_instr(vpmovzxwd))]
12137pub fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12138    unsafe {
12139        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12140        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
12141    }
12142}
12143
12144/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12145///
12146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
12147#[inline]
12148#[target_feature(enable = "avx512f,avx512vl")]
12149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12150#[cfg_attr(test, assert_instr(vpmovzxwd))]
12151pub fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12152    unsafe {
12153        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12154        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
12155    }
12156}
12157
12158/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12159///
12160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
12161#[inline]
12162#[target_feature(enable = "avx512f,avx512vl")]
12163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12164#[cfg_attr(test, assert_instr(vpmovzxwd))]
12165pub fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12166    unsafe {
12167        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12168        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
12169    }
12170}
12171
12172/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12173///
12174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
12175#[inline]
12176#[target_feature(enable = "avx512f,avx512vl")]
12177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12178#[cfg_attr(test, assert_instr(vpmovzxwd))]
12179pub fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12180    unsafe {
12181        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12182        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
12183    }
12184}
12185
12186/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12187///
12188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
12189#[inline]
12190#[target_feature(enable = "avx512f,avx512vl")]
12191#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12192#[cfg_attr(test, assert_instr(vpmovzxwd))]
12193pub fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12194    unsafe {
12195        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12196        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12197    }
12198}
12199
12200/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12201///
12202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
12203#[inline]
12204#[target_feature(enable = "avx512f")]
12205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12206#[cfg_attr(test, assert_instr(vpmovzxwq))]
12207pub fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
12208    unsafe {
12209        let a = a.as_u16x8();
12210        transmute::<i64x8, _>(simd_cast(a))
12211    }
12212}
12213
12214/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12215///
12216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
12217#[inline]
12218#[target_feature(enable = "avx512f")]
12219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12220#[cfg_attr(test, assert_instr(vpmovzxwq))]
12221pub fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12222    unsafe {
12223        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12224        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12225    }
12226}
12227
12228/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12229///
12230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
12231#[inline]
12232#[target_feature(enable = "avx512f")]
12233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12234#[cfg_attr(test, assert_instr(vpmovzxwq))]
12235pub fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12236    unsafe {
12237        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12238        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12239    }
12240}
12241
12242/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12243///
12244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
12245#[inline]
12246#[target_feature(enable = "avx512f,avx512vl")]
12247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12248#[cfg_attr(test, assert_instr(vpmovzxwq))]
12249pub fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12250    unsafe {
12251        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12252        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12253    }
12254}
12255
12256/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12257///
12258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
12259#[inline]
12260#[target_feature(enable = "avx512f,avx512vl")]
12261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12262#[cfg_attr(test, assert_instr(vpmovzxwq))]
12263pub fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12264    unsafe {
12265        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12266        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12267    }
12268}
12269
12270/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12271///
12272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
12273#[inline]
12274#[target_feature(enable = "avx512f,avx512vl")]
12275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12276#[cfg_attr(test, assert_instr(vpmovzxwq))]
12277pub fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12278    unsafe {
12279        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12280        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12281    }
12282}
12283
12284/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12285///
12286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
12287#[inline]
12288#[target_feature(enable = "avx512f,avx512vl")]
12289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12290#[cfg_attr(test, assert_instr(vpmovzxwq))]
12291pub fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12292    unsafe {
12293        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12294        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12295    }
12296}
12297
12298/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12299///
12300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
12301#[inline]
12302#[target_feature(enable = "avx512f")]
12303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12304#[cfg_attr(test, assert_instr(vpmovsxdq))]
12305pub fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
12306    unsafe {
12307        let a = a.as_i32x8();
12308        transmute::<i64x8, _>(simd_cast(a))
12309    }
12310}
12311
12312/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12313///
12314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
12315#[inline]
12316#[target_feature(enable = "avx512f")]
12317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12318#[cfg_attr(test, assert_instr(vpmovsxdq))]
12319pub fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12320    unsafe {
12321        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12322        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12323    }
12324}
12325
12326/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12327///
12328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
12329#[inline]
12330#[target_feature(enable = "avx512f")]
12331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12332#[cfg_attr(test, assert_instr(vpmovsxdq))]
12333pub fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12334    unsafe {
12335        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12336        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12337    }
12338}
12339
12340/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12341///
12342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
12343#[inline]
12344#[target_feature(enable = "avx512f,avx512vl")]
12345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12346#[cfg_attr(test, assert_instr(vpmovsxdq))]
12347pub fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12348    unsafe {
12349        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12350        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12351    }
12352}
12353
12354/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12355///
12356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
12357#[inline]
12358#[target_feature(enable = "avx512f,avx512vl")]
12359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12360#[cfg_attr(test, assert_instr(vpmovsxdq))]
12361pub fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12362    unsafe {
12363        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12364        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12365    }
12366}
12367
12368/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12369///
12370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
12371#[inline]
12372#[target_feature(enable = "avx512f,avx512vl")]
12373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12374#[cfg_attr(test, assert_instr(vpmovsxdq))]
12375pub fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12376    unsafe {
12377        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12378        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12379    }
12380}
12381
12382/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12383///
12384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
12385#[inline]
12386#[target_feature(enable = "avx512f,avx512vl")]
12387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12388#[cfg_attr(test, assert_instr(vpmovsxdq))]
12389pub fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12390    unsafe {
12391        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12392        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12393    }
12394}
12395
12396/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12397///
12398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
12399#[inline]
12400#[target_feature(enable = "avx512f")]
12401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12402#[cfg_attr(test, assert_instr(vpmovzxdq))]
12403pub fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
12404    unsafe {
12405        let a = a.as_u32x8();
12406        transmute::<i64x8, _>(simd_cast(a))
12407    }
12408}
12409
12410/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12411///
12412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
12413#[inline]
12414#[target_feature(enable = "avx512f")]
12415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12416#[cfg_attr(test, assert_instr(vpmovzxdq))]
12417pub fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12418    unsafe {
12419        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12420        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12421    }
12422}
12423
12424/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12425///
12426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
12427#[inline]
12428#[target_feature(enable = "avx512f")]
12429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12430#[cfg_attr(test, assert_instr(vpmovzxdq))]
12431pub fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12432    unsafe {
12433        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12434        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12435    }
12436}
12437
12438/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12439///
12440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
12441#[inline]
12442#[target_feature(enable = "avx512f,avx512vl")]
12443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12444#[cfg_attr(test, assert_instr(vpmovzxdq))]
12445pub fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12446    unsafe {
12447        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12448        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12449    }
12450}
12451
12452/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12453///
12454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
12455#[inline]
12456#[target_feature(enable = "avx512f,avx512vl")]
12457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12458#[cfg_attr(test, assert_instr(vpmovzxdq))]
12459pub fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12460    unsafe {
12461        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12462        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12463    }
12464}
12465
12466/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12467///
12468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
12469#[inline]
12470#[target_feature(enable = "avx512f,avx512vl")]
12471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12472#[cfg_attr(test, assert_instr(vpmovzxdq))]
12473pub fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12474    unsafe {
12475        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12476        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12477    }
12478}
12479
12480/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12481///
12482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
12483#[inline]
12484#[target_feature(enable = "avx512f,avx512vl")]
12485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12486#[cfg_attr(test, assert_instr(vpmovzxdq))]
12487pub fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12488    unsafe {
12489        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12490        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12491    }
12492}
12493
12494/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12495///
12496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
12497#[inline]
12498#[target_feature(enable = "avx512f")]
12499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12500#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12501pub fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
12502    unsafe {
12503        let a = a.as_i32x16();
12504        transmute::<f32x16, _>(simd_cast(a))
12505    }
12506}
12507
12508/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12509///
12510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
12511#[inline]
12512#[target_feature(enable = "avx512f")]
12513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12514#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12515pub fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12516    unsafe {
12517        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12518        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
12519    }
12520}
12521
12522/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12523///
12524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
12525#[inline]
12526#[target_feature(enable = "avx512f")]
12527#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12528#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12529pub fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
12530    unsafe {
12531        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12532        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
12533    }
12534}
12535
12536/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12537///
12538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
12539#[inline]
12540#[target_feature(enable = "avx512f,avx512vl")]
12541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12542#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12543pub fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
12544    unsafe {
12545        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12546        transmute(simd_select_bitmask(k, convert, src.as_f32x8()))
12547    }
12548}
12549
12550/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12551///
12552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
12553#[inline]
12554#[target_feature(enable = "avx512f,avx512vl")]
12555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12556#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12557pub fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
12558    unsafe {
12559        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12560        transmute(simd_select_bitmask(k, convert, f32x8::ZERO))
12561    }
12562}
12563
12564/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12565///
12566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
12567#[inline]
12568#[target_feature(enable = "avx512f,avx512vl")]
12569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12570#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12571pub fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
12572    unsafe {
12573        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12574        transmute(simd_select_bitmask(k, convert, src.as_f32x4()))
12575    }
12576}
12577
12578/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12579///
12580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
12581#[inline]
12582#[target_feature(enable = "avx512f,avx512vl")]
12583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12584#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12585pub fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
12586    unsafe {
12587        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12588        transmute(simd_select_bitmask(k, convert, f32x4::ZERO))
12589    }
12590}
12591
12592/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12593///
12594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
12595#[inline]
12596#[target_feature(enable = "avx512f")]
12597#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12598#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12599pub fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
12600    unsafe {
12601        let a = a.as_i32x8();
12602        transmute::<f64x8, _>(simd_cast(a))
12603    }
12604}
12605
12606/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12607///
12608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
12609#[inline]
12610#[target_feature(enable = "avx512f")]
12611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12612#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12613pub fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12614    unsafe {
12615        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12616        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12617    }
12618}
12619
12620/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12621///
12622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
12623#[inline]
12624#[target_feature(enable = "avx512f")]
12625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12626#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12627pub fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
12628    unsafe {
12629        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12630        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
12631    }
12632}
12633
12634/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12635///
12636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
12637#[inline]
12638#[target_feature(enable = "avx512f,avx512vl")]
12639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12640#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12641pub fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12642    unsafe {
12643        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
12644        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
12645    }
12646}
12647
12648/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12649///
12650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
12651#[inline]
12652#[target_feature(enable = "avx512f,avx512vl")]
12653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12654#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12655pub fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
12656    unsafe {
12657        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
12658        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
12659    }
12660}
12661
12662/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12663///
12664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
12665#[inline]
12666#[target_feature(enable = "avx512f,avx512vl")]
12667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12668#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12669pub fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12670    unsafe {
12671        let convert = _mm_cvtepi32_pd(a).as_f64x2();
12672        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
12673    }
12674}
12675
12676/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12677///
12678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
12679#[inline]
12680#[target_feature(enable = "avx512f,avx512vl")]
12681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12682#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12683pub fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
12684    unsafe {
12685        let convert = _mm_cvtepi32_pd(a).as_f64x2();
12686        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
12687    }
12688}
12689
12690/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12691///
12692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
12693#[inline]
12694#[target_feature(enable = "avx512f")]
12695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12696#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12697pub fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
12698    unsafe {
12699        let a = a.as_u32x16();
12700        transmute::<f32x16, _>(simd_cast(a))
12701    }
12702}
12703
12704/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12705///
12706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
12707#[inline]
12708#[target_feature(enable = "avx512f")]
12709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12710#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12711pub fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12712    unsafe {
12713        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
12714        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
12715    }
12716}
12717
12718/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12719///
12720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
12721#[inline]
12722#[target_feature(enable = "avx512f")]
12723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12724#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12725pub fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
12726    unsafe {
12727        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
12728        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
12729    }
12730}
12731
12732/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12733///
12734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
12735#[inline]
12736#[target_feature(enable = "avx512f")]
12737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12738#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12739pub fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
12740    unsafe {
12741        let a = a.as_u32x8();
12742        transmute::<f64x8, _>(simd_cast(a))
12743    }
12744}
12745
12746/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12747///
12748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
12749#[inline]
12750#[target_feature(enable = "avx512f")]
12751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12752#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12753pub fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12754    unsafe {
12755        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
12756        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12757    }
12758}
12759
12760/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12761///
12762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
12763#[inline]
12764#[target_feature(enable = "avx512f")]
12765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12766#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12767pub fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
12768    unsafe {
12769        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
12770        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
12771    }
12772}
12773
12774/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12775///
12776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
12777#[inline]
12778#[target_feature(enable = "avx512f,avx512vl")]
12779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12780#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12781pub fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
12782    unsafe {
12783        let a = a.as_u32x4();
12784        transmute::<f64x4, _>(simd_cast(a))
12785    }
12786}
12787
12788/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12789///
12790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
12791#[inline]
12792#[target_feature(enable = "avx512f,avx512vl")]
12793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12794#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12795pub fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12796    unsafe {
12797        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
12798        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
12799    }
12800}
12801
12802/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12803///
12804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
12805#[inline]
12806#[target_feature(enable = "avx512f,avx512vl")]
12807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12808#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12809pub fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
12810    unsafe {
12811        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
12812        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
12813    }
12814}
12815
12816/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12817///
12818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
12819#[inline]
12820#[target_feature(enable = "avx512f,avx512vl")]
12821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12822#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12823pub fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
12824    unsafe {
12825        let a = a.as_u32x4();
12826        let u64: u32x2 = simd_shuffle!(a, a, [0, 1]);
12827        transmute::<f64x2, _>(simd_cast(u64))
12828    }
12829}
12830
12831/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12832///
12833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
12834#[inline]
12835#[target_feature(enable = "avx512f,avx512vl")]
12836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12837#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12838pub fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12839    unsafe {
12840        let convert = _mm_cvtepu32_pd(a).as_f64x2();
12841        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
12842    }
12843}
12844
12845/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12846///
12847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
12848#[inline]
12849#[target_feature(enable = "avx512f,avx512vl")]
12850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12851#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12852pub fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
12853    unsafe {
12854        let convert = _mm_cvtepu32_pd(a).as_f64x2();
12855        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
12856    }
12857}
12858
12859/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12860///
12861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
12862#[inline]
12863#[target_feature(enable = "avx512f")]
12864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12865#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12866pub fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
12867    unsafe {
12868        let v2 = v2.as_i32x16();
12869        let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12870        transmute::<f64x8, _>(simd_cast(v256))
12871    }
12872}
12873
12874/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12875///
12876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
12877#[inline]
12878#[target_feature(enable = "avx512f")]
12879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12880#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12881pub fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12882    unsafe {
12883        let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8();
12884        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12885    }
12886}
12887
12888/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12889///
12890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
12891#[inline]
12892#[target_feature(enable = "avx512f")]
12893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12894#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12895pub fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
12896    unsafe {
12897        let v2 = v2.as_u32x16();
12898        let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12899        transmute::<f64x8, _>(simd_cast(v256))
12900    }
12901}
12902
12903/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12904///
12905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
12906#[inline]
12907#[target_feature(enable = "avx512f")]
12908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12909#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12910pub fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12911    unsafe {
12912        let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8();
12913        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12914    }
12915}
12916
12917/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12918///
12919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
12920#[inline]
12921#[target_feature(enable = "avx512f")]
12922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12923#[cfg_attr(test, assert_instr(vpmovdw))]
12924pub fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
12925    unsafe {
12926        let a = a.as_i32x16();
12927        transmute::<i16x16, _>(simd_cast(a))
12928    }
12929}
12930
12931/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12932///
12933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
12934#[inline]
12935#[target_feature(enable = "avx512f")]
12936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12937#[cfg_attr(test, assert_instr(vpmovdw))]
12938pub fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
12939    unsafe {
12940        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
12941        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
12942    }
12943}
12944
12945/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12946///
12947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
12948#[inline]
12949#[target_feature(enable = "avx512f")]
12950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12951#[cfg_attr(test, assert_instr(vpmovdw))]
12952pub fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
12953    unsafe {
12954        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
12955        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
12956    }
12957}
12958
12959/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12960///
12961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
12962#[inline]
12963#[target_feature(enable = "avx512f,avx512vl")]
12964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12965#[cfg_attr(test, assert_instr(vpmovdw))]
12966pub fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
12967    unsafe {
12968        let a = a.as_i32x8();
12969        transmute::<i16x8, _>(simd_cast(a))
12970    }
12971}
12972
12973/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12974///
12975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
12976#[inline]
12977#[target_feature(enable = "avx512f,avx512vl")]
12978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12979#[cfg_attr(test, assert_instr(vpmovdw))]
12980pub fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12981    unsafe {
12982        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
12983        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
12984    }
12985}
12986
12987/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12988///
12989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
12990#[inline]
12991#[target_feature(enable = "avx512f,avx512vl")]
12992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12993#[cfg_attr(test, assert_instr(vpmovdw))]
12994pub fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
12995    unsafe {
12996        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
12997        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
12998    }
12999}
13000
13001/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13002///
13003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
13004#[inline]
13005#[target_feature(enable = "avx512f,avx512vl")]
13006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13007#[cfg_attr(test, assert_instr(vpmovdw))]
13008pub fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
13009    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13010}
13011
13012/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13013///
13014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
13015#[inline]
13016#[target_feature(enable = "avx512f,avx512vl")]
13017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13018#[cfg_attr(test, assert_instr(vpmovdw))]
13019pub fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13020    unsafe { transmute(vpmovdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13021}
13022
13023/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13024///
13025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
13026#[inline]
13027#[target_feature(enable = "avx512f,avx512vl")]
13028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13029#[cfg_attr(test, assert_instr(vpmovdw))]
13030pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13031    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13032}
13033
13034/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13035///
13036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
13037#[inline]
13038#[target_feature(enable = "avx512f")]
13039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13040#[cfg_attr(test, assert_instr(vpmovdb))]
13041pub fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
13042    unsafe {
13043        let a = a.as_i32x16();
13044        transmute::<i8x16, _>(simd_cast(a))
13045    }
13046}
13047
13048/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13049///
13050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
13051#[inline]
13052#[target_feature(enable = "avx512f")]
13053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13054#[cfg_attr(test, assert_instr(vpmovdb))]
13055pub fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13056    unsafe {
13057        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13058        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
13059    }
13060}
13061
13062/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13063///
13064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
13065#[inline]
13066#[target_feature(enable = "avx512f")]
13067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13068#[cfg_attr(test, assert_instr(vpmovdb))]
13069pub fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13070    unsafe {
13071        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13072        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
13073    }
13074}
13075
13076/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13077///
13078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
13079#[inline]
13080#[target_feature(enable = "avx512f,avx512vl")]
13081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13082#[cfg_attr(test, assert_instr(vpmovdb))]
13083pub fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
13084    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
13085}
13086
13087/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13088///
13089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
13090#[inline]
13091#[target_feature(enable = "avx512f,avx512vl")]
13092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13093#[cfg_attr(test, assert_instr(vpmovdb))]
13094pub fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13095    unsafe { transmute(vpmovdb256(a.as_i32x8(), src.as_i8x16(), k)) }
13096}
13097
13098/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13099///
13100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
13101#[inline]
13102#[target_feature(enable = "avx512f,avx512vl")]
13103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13104#[cfg_attr(test, assert_instr(vpmovdb))]
13105pub fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13106    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, k)) }
13107}
13108
13109/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13110///
13111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
13112#[inline]
13113#[target_feature(enable = "avx512f,avx512vl")]
13114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13115#[cfg_attr(test, assert_instr(vpmovdb))]
13116pub fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
13117    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
13118}
13119
13120/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13121///
13122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
13123#[inline]
13124#[target_feature(enable = "avx512f,avx512vl")]
13125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13126#[cfg_attr(test, assert_instr(vpmovdb))]
13127pub fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13128    unsafe { transmute(vpmovdb128(a.as_i32x4(), src.as_i8x16(), k)) }
13129}
13130
13131/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13132///
13133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
13134#[inline]
13135#[target_feature(enable = "avx512f,avx512vl")]
13136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13137#[cfg_attr(test, assert_instr(vpmovdb))]
13138pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13139    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, k)) }
13140}
13141
13142/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13143///
13144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
13145#[inline]
13146#[target_feature(enable = "avx512f")]
13147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13148#[cfg_attr(test, assert_instr(vpmovqd))]
13149pub fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
13150    unsafe {
13151        let a = a.as_i64x8();
13152        transmute::<i32x8, _>(simd_cast(a))
13153    }
13154}
13155
13156/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13157///
13158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
13159#[inline]
13160#[target_feature(enable = "avx512f")]
13161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13162#[cfg_attr(test, assert_instr(vpmovqd))]
13163pub fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13164    unsafe {
13165        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13166        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
13167    }
13168}
13169
13170/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13171///
13172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
13173#[inline]
13174#[target_feature(enable = "avx512f")]
13175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13176#[cfg_attr(test, assert_instr(vpmovqd))]
13177pub fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13178    unsafe {
13179        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13180        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
13181    }
13182}
13183
13184/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13185///
13186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
13187#[inline]
13188#[target_feature(enable = "avx512f,avx512vl")]
13189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13190#[cfg_attr(test, assert_instr(vpmovqd))]
13191pub fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
13192    unsafe {
13193        let a = a.as_i64x4();
13194        transmute::<i32x4, _>(simd_cast(a))
13195    }
13196}
13197
13198/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13199///
13200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
13201#[inline]
13202#[target_feature(enable = "avx512f,avx512vl")]
13203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13204#[cfg_attr(test, assert_instr(vpmovqd))]
13205pub fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13206    unsafe {
13207        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13208        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
13209    }
13210}
13211
13212/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13213///
13214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
13215#[inline]
13216#[target_feature(enable = "avx512f,avx512vl")]
13217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13218#[cfg_attr(test, assert_instr(vpmovqd))]
13219pub fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13220    unsafe {
13221        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13222        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
13223    }
13224}
13225
13226/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13227///
13228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
13229#[inline]
13230#[target_feature(enable = "avx512f,avx512vl")]
13231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13232#[cfg_attr(test, assert_instr(vpmovqd))]
13233pub fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
13234    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
13235}
13236
13237/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13238///
13239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
13240#[inline]
13241#[target_feature(enable = "avx512f,avx512vl")]
13242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13243#[cfg_attr(test, assert_instr(vpmovqd))]
13244pub fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13245    unsafe { transmute(vpmovqd128(a.as_i64x2(), src.as_i32x4(), k)) }
13246}
13247
13248/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13249///
13250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
13251#[inline]
13252#[target_feature(enable = "avx512f,avx512vl")]
13253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13254#[cfg_attr(test, assert_instr(vpmovqd))]
13255pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13256    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, k)) }
13257}
13258
13259/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13260///
13261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
13262#[inline]
13263#[target_feature(enable = "avx512f")]
13264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13265#[cfg_attr(test, assert_instr(vpmovqw))]
13266pub fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
13267    unsafe {
13268        let a = a.as_i64x8();
13269        transmute::<i16x8, _>(simd_cast(a))
13270    }
13271}
13272
13273/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13274///
13275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
13276#[inline]
13277#[target_feature(enable = "avx512f")]
13278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13279#[cfg_attr(test, assert_instr(vpmovqw))]
13280pub fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13281    unsafe {
13282        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13283        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
13284    }
13285}
13286
13287/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13288///
13289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
13290#[inline]
13291#[target_feature(enable = "avx512f")]
13292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13293#[cfg_attr(test, assert_instr(vpmovqw))]
13294pub fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13295    unsafe {
13296        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13297        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
13298    }
13299}
13300
13301/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13302///
13303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
13304#[inline]
13305#[target_feature(enable = "avx512f,avx512vl")]
13306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13307#[cfg_attr(test, assert_instr(vpmovqw))]
13308pub fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
13309    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
13310}
13311
13312/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13313///
13314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
13315#[inline]
13316#[target_feature(enable = "avx512f,avx512vl")]
13317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13318#[cfg_attr(test, assert_instr(vpmovqw))]
13319pub fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13320    unsafe { transmute(vpmovqw256(a.as_i64x4(), src.as_i16x8(), k)) }
13321}
13322
13323/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13324///
13325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
13326#[inline]
13327#[target_feature(enable = "avx512f,avx512vl")]
13328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13329#[cfg_attr(test, assert_instr(vpmovqw))]
13330pub fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13331    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, k)) }
13332}
13333
13334/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13335///
13336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
13337#[inline]
13338#[target_feature(enable = "avx512f,avx512vl")]
13339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13340#[cfg_attr(test, assert_instr(vpmovqw))]
13341pub fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
13342    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
13343}
13344
13345/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13346///
13347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
13348#[inline]
13349#[target_feature(enable = "avx512f,avx512vl")]
13350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13351#[cfg_attr(test, assert_instr(vpmovqw))]
13352pub fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13353    unsafe { transmute(vpmovqw128(a.as_i64x2(), src.as_i16x8(), k)) }
13354}
13355
13356/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13357///
13358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
13359#[inline]
13360#[target_feature(enable = "avx512f,avx512vl")]
13361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13362#[cfg_attr(test, assert_instr(vpmovqw))]
13363pub fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13364    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, k)) }
13365}
13366
13367/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13368///
13369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
13370#[inline]
13371#[target_feature(enable = "avx512f")]
13372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13373#[cfg_attr(test, assert_instr(vpmovqb))]
13374pub fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
13375    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
13376}
13377
13378/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13379///
13380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
13381#[inline]
13382#[target_feature(enable = "avx512f")]
13383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13384#[cfg_attr(test, assert_instr(vpmovqb))]
13385pub fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13386    unsafe { transmute(vpmovqb(a.as_i64x8(), src.as_i8x16(), k)) }
13387}
13388
13389/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13390///
13391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
13392#[inline]
13393#[target_feature(enable = "avx512f")]
13394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13395#[cfg_attr(test, assert_instr(vpmovqb))]
13396pub fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13397    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, k)) }
13398}
13399
13400/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13401///
13402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
13403#[inline]
13404#[target_feature(enable = "avx512f,avx512vl")]
13405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13406#[cfg_attr(test, assert_instr(vpmovqb))]
13407pub fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
13408    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
13409}
13410
13411/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13412///
13413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
13414#[inline]
13415#[target_feature(enable = "avx512f,avx512vl")]
13416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13417#[cfg_attr(test, assert_instr(vpmovqb))]
13418pub fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13419    unsafe { transmute(vpmovqb256(a.as_i64x4(), src.as_i8x16(), k)) }
13420}
13421
13422/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13423///
13424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
13425#[inline]
13426#[target_feature(enable = "avx512f,avx512vl")]
13427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13428#[cfg_attr(test, assert_instr(vpmovqb))]
13429pub fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13430    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, k)) }
13431}
13432
13433/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13434///
13435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
13436#[inline]
13437#[target_feature(enable = "avx512f,avx512vl")]
13438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13439#[cfg_attr(test, assert_instr(vpmovqb))]
13440pub fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
13441    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
13442}
13443
13444/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13445///
13446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
13447#[inline]
13448#[target_feature(enable = "avx512f,avx512vl")]
13449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13450#[cfg_attr(test, assert_instr(vpmovqb))]
13451pub fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13452    unsafe { transmute(vpmovqb128(a.as_i64x2(), src.as_i8x16(), k)) }
13453}
13454
13455/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13456///
13457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
13458#[inline]
13459#[target_feature(enable = "avx512f,avx512vl")]
13460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13461#[cfg_attr(test, assert_instr(vpmovqb))]
13462pub fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13463    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, k)) }
13464}
13465
13466/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13467///
13468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13469#[inline]
13470#[target_feature(enable = "avx512f")]
13471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13472#[cfg_attr(test, assert_instr(vpmovsdw))]
13473pub fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
13474    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, 0b11111111_11111111)) }
13475}
13476
13477/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13478///
13479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
13480#[inline]
13481#[target_feature(enable = "avx512f")]
13482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13483#[cfg_attr(test, assert_instr(vpmovsdw))]
13484pub fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13485    unsafe { transmute(vpmovsdw(a.as_i32x16(), src.as_i16x16(), k)) }
13486}
13487
13488/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13489///
13490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi16&expand=1819)
13491#[inline]
13492#[target_feature(enable = "avx512f")]
13493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13494#[cfg_attr(test, assert_instr(vpmovsdw))]
13495pub fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13496    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, k)) }
13497}
13498
13499/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13500///
13501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
13502#[inline]
13503#[target_feature(enable = "avx512f,avx512vl")]
13504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13505#[cfg_attr(test, assert_instr(vpmovsdw))]
13506pub fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
13507    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, 0b11111111)) }
13508}
13509
13510/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13511///
13512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
13513#[inline]
13514#[target_feature(enable = "avx512f,avx512vl")]
13515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13516#[cfg_attr(test, assert_instr(vpmovsdw))]
13517pub fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13518    unsafe { transmute(vpmovsdw256(a.as_i32x8(), src.as_i16x8(), k)) }
13519}
13520
13521/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13522///
13523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
13524#[inline]
13525#[target_feature(enable = "avx512f,avx512vl")]
13526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13527#[cfg_attr(test, assert_instr(vpmovsdw))]
13528pub fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13529    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, k)) }
13530}
13531
13532/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13533///
13534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
13535#[inline]
13536#[target_feature(enable = "avx512f,avx512vl")]
13537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13538#[cfg_attr(test, assert_instr(vpmovsdw))]
13539pub fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
13540    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13541}
13542
13543/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13544///
13545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
13546#[inline]
13547#[target_feature(enable = "avx512f,avx512vl")]
13548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13549#[cfg_attr(test, assert_instr(vpmovsdw))]
13550pub fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13551    unsafe { transmute(vpmovsdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13552}
13553
13554/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13555///
13556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
13557#[inline]
13558#[target_feature(enable = "avx512f,avx512vl")]
13559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13560#[cfg_attr(test, assert_instr(vpmovsdw))]
13561pub fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13562    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13563}
13564
13565/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13566///
13567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
13568#[inline]
13569#[target_feature(enable = "avx512f")]
13570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13571#[cfg_attr(test, assert_instr(vpmovsdb))]
13572pub fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
13573    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, 0b11111111_11111111)) }
13574}
13575
13576/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13577///
13578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
13579#[inline]
13580#[target_feature(enable = "avx512f")]
13581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13582#[cfg_attr(test, assert_instr(vpmovsdb))]
13583pub fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13584    unsafe { transmute(vpmovsdb(a.as_i32x16(), src.as_i8x16(), k)) }
13585}
13586
13587/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13588///
13589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
13590#[inline]
13591#[target_feature(enable = "avx512f")]
13592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13593#[cfg_attr(test, assert_instr(vpmovsdb))]
13594pub fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13595    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, k)) }
13596}
13597
13598/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13599///
13600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
13601#[inline]
13602#[target_feature(enable = "avx512f,avx512vl")]
13603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13604#[cfg_attr(test, assert_instr(vpmovsdb))]
13605pub fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
13606    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
13607}
13608
13609/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13610///
13611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
13612#[inline]
13613#[target_feature(enable = "avx512f,avx512vl")]
13614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13615#[cfg_attr(test, assert_instr(vpmovsdb))]
13616pub fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13617    unsafe { transmute(vpmovsdb256(a.as_i32x8(), src.as_i8x16(), k)) }
13618}
13619
13620/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13621///
13622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
13623#[inline]
13624#[target_feature(enable = "avx512f,avx512vl")]
13625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13626#[cfg_attr(test, assert_instr(vpmovsdb))]
13627pub fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13628    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, k)) }
13629}
13630
13631/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13632///
13633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
13634#[inline]
13635#[target_feature(enable = "avx512f,avx512vl")]
13636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13637#[cfg_attr(test, assert_instr(vpmovsdb))]
13638pub fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
13639    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
13640}
13641
13642/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13643///
13644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
13645#[inline]
13646#[target_feature(enable = "avx512f,avx512vl")]
13647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13648#[cfg_attr(test, assert_instr(vpmovsdb))]
13649pub fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13650    unsafe { transmute(vpmovsdb128(a.as_i32x4(), src.as_i8x16(), k)) }
13651}
13652
13653/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13654///
13655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
13656#[inline]
13657#[target_feature(enable = "avx512f,avx512vl")]
13658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13659#[cfg_attr(test, assert_instr(vpmovsdb))]
13660pub fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13661    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, k)) }
13662}
13663
13664/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13665///
13666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
13667#[inline]
13668#[target_feature(enable = "avx512f")]
13669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13670#[cfg_attr(test, assert_instr(vpmovsqd))]
13671pub fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
13672    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, 0b11111111)) }
13673}
13674
13675/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13676///
13677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
13678#[inline]
13679#[target_feature(enable = "avx512f")]
13680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13681#[cfg_attr(test, assert_instr(vpmovsqd))]
13682pub fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13683    unsafe { transmute(vpmovsqd(a.as_i64x8(), src.as_i32x8(), k)) }
13684}
13685
13686/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13687///
13688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
13689#[inline]
13690#[target_feature(enable = "avx512f")]
13691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13692#[cfg_attr(test, assert_instr(vpmovsqd))]
13693pub fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13694    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, k)) }
13695}
13696
13697/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13698///
13699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
13700#[inline]
13701#[target_feature(enable = "avx512f,avx512vl")]
13702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13703#[cfg_attr(test, assert_instr(vpmovsqd))]
13704pub fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
13705    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, 0b11111111)) }
13706}
13707
13708/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13709///
13710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
13711#[inline]
13712#[target_feature(enable = "avx512f,avx512vl")]
13713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13714#[cfg_attr(test, assert_instr(vpmovsqd))]
13715pub fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13716    unsafe { transmute(vpmovsqd256(a.as_i64x4(), src.as_i32x4(), k)) }
13717}
13718
13719/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13720///
13721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
13722#[inline]
13723#[target_feature(enable = "avx512f,avx512vl")]
13724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13725#[cfg_attr(test, assert_instr(vpmovsqd))]
13726pub fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13727    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, k)) }
13728}
13729
13730/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13731///
13732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
13733#[inline]
13734#[target_feature(enable = "avx512f,avx512vl")]
13735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13736#[cfg_attr(test, assert_instr(vpmovsqd))]
13737pub fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
13738    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
13739}
13740
13741/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13742///
13743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
13744#[inline]
13745#[target_feature(enable = "avx512f,avx512vl")]
13746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13747#[cfg_attr(test, assert_instr(vpmovsqd))]
13748pub fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13749    unsafe { transmute(vpmovsqd128(a.as_i64x2(), src.as_i32x4(), k)) }
13750}
13751
13752/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13753///
13754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
13755#[inline]
13756#[target_feature(enable = "avx512f,avx512vl")]
13757#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13758#[cfg_attr(test, assert_instr(vpmovsqd))]
13759pub fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13760    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, k)) }
13761}
13762
13763/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13764///
13765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
13766#[inline]
13767#[target_feature(enable = "avx512f")]
13768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13769#[cfg_attr(test, assert_instr(vpmovsqw))]
13770pub fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
13771    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, 0b11111111)) }
13772}
13773
13774/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13775///
13776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
13777#[inline]
13778#[target_feature(enable = "avx512f")]
13779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13780#[cfg_attr(test, assert_instr(vpmovsqw))]
13781pub fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13782    unsafe { transmute(vpmovsqw(a.as_i64x8(), src.as_i16x8(), k)) }
13783}
13784
13785/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13786///
13787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
13788#[inline]
13789#[target_feature(enable = "avx512f")]
13790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13791#[cfg_attr(test, assert_instr(vpmovsqw))]
13792pub fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13793    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, k)) }
13794}
13795
13796/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13797///
13798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
13799#[inline]
13800#[target_feature(enable = "avx512f,avx512vl")]
13801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13802#[cfg_attr(test, assert_instr(vpmovsqw))]
13803pub fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
13804    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
13805}
13806
13807/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13808///
13809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
13810#[inline]
13811#[target_feature(enable = "avx512f,avx512vl")]
13812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13813#[cfg_attr(test, assert_instr(vpmovsqw))]
13814pub fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13815    unsafe { transmute(vpmovsqw256(a.as_i64x4(), src.as_i16x8(), k)) }
13816}
13817
13818/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13819///
13820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
13821#[inline]
13822#[target_feature(enable = "avx512f,avx512vl")]
13823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13824#[cfg_attr(test, assert_instr(vpmovsqw))]
13825pub fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13826    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, k)) }
13827}
13828
13829/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13830///
13831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
13832#[inline]
13833#[target_feature(enable = "avx512f,avx512vl")]
13834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13835#[cfg_attr(test, assert_instr(vpmovsqw))]
13836pub fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
13837    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
13838}
13839
13840/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13841///
13842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
13843#[inline]
13844#[target_feature(enable = "avx512f,avx512vl")]
13845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13846#[cfg_attr(test, assert_instr(vpmovsqw))]
13847pub fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13848    unsafe { transmute(vpmovsqw128(a.as_i64x2(), src.as_i16x8(), k)) }
13849}
13850
13851/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13852///
13853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
13854#[inline]
13855#[target_feature(enable = "avx512f,avx512vl")]
13856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13857#[cfg_attr(test, assert_instr(vpmovsqw))]
13858pub fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13859    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, k)) }
13860}
13861
13862/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13863///
13864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
13865#[inline]
13866#[target_feature(enable = "avx512f")]
13867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13868#[cfg_attr(test, assert_instr(vpmovsqb))]
13869pub fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
13870    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
13871}
13872
13873/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13874///
13875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
13876#[inline]
13877#[target_feature(enable = "avx512f")]
13878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13879#[cfg_attr(test, assert_instr(vpmovsqb))]
13880pub fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13881    unsafe { transmute(vpmovsqb(a.as_i64x8(), src.as_i8x16(), k)) }
13882}
13883
13884/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13885///
13886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
13887#[inline]
13888#[target_feature(enable = "avx512f")]
13889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13890#[cfg_attr(test, assert_instr(vpmovsqb))]
13891pub fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13892    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, k)) }
13893}
13894
13895/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13896///
13897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
13898#[inline]
13899#[target_feature(enable = "avx512f,avx512vl")]
13900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13901#[cfg_attr(test, assert_instr(vpmovsqb))]
13902pub fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
13903    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
13904}
13905
13906/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13907///
13908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
13909#[inline]
13910#[target_feature(enable = "avx512f,avx512vl")]
13911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13912#[cfg_attr(test, assert_instr(vpmovsqb))]
13913pub fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13914    unsafe { transmute(vpmovsqb256(a.as_i64x4(), src.as_i8x16(), k)) }
13915}
13916
13917/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13918///
13919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
13920#[inline]
13921#[target_feature(enable = "avx512f,avx512vl")]
13922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13923#[cfg_attr(test, assert_instr(vpmovsqb))]
13924pub fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13925    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, k)) }
13926}
13927
13928/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13929///
13930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
13931#[inline]
13932#[target_feature(enable = "avx512f,avx512vl")]
13933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13934#[cfg_attr(test, assert_instr(vpmovsqb))]
13935pub fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
13936    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
13937}
13938
13939/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13940///
13941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
13942#[inline]
13943#[target_feature(enable = "avx512f,avx512vl")]
13944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13945#[cfg_attr(test, assert_instr(vpmovsqb))]
13946pub fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13947    unsafe { transmute(vpmovsqb128(a.as_i64x2(), src.as_i8x16(), k)) }
13948}
13949
13950/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13951///
13952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
13953#[inline]
13954#[target_feature(enable = "avx512f,avx512vl")]
13955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13956#[cfg_attr(test, assert_instr(vpmovsqb))]
13957pub fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13958    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, k)) }
13959}
13960
13961/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13962///
13963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
13964#[inline]
13965#[target_feature(enable = "avx512f")]
13966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13967#[cfg_attr(test, assert_instr(vpmovusdw))]
13968pub fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
13969    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, 0b11111111_11111111)) }
13970}
13971
13972/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13973///
13974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
13975#[inline]
13976#[target_feature(enable = "avx512f")]
13977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13978#[cfg_attr(test, assert_instr(vpmovusdw))]
13979pub fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13980    unsafe { transmute(vpmovusdw(a.as_u32x16(), src.as_u16x16(), k)) }
13981}
13982
13983/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13984///
13985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
13986#[inline]
13987#[target_feature(enable = "avx512f")]
13988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13989#[cfg_attr(test, assert_instr(vpmovusdw))]
13990pub fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13991    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, k)) }
13992}
13993
13994/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13995///
13996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
13997#[inline]
13998#[target_feature(enable = "avx512f,avx512vl")]
13999#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14000#[cfg_attr(test, assert_instr(vpmovusdw))]
14001pub fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
14002    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, 0b11111111)) }
14003}
14004
14005/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14006///
14007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
14008#[inline]
14009#[target_feature(enable = "avx512f,avx512vl")]
14010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14011#[cfg_attr(test, assert_instr(vpmovusdw))]
14012pub fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14013    unsafe { transmute(vpmovusdw256(a.as_u32x8(), src.as_u16x8(), k)) }
14014}
14015
14016/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14017///
14018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
14019#[inline]
14020#[target_feature(enable = "avx512f,avx512vl")]
14021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14022#[cfg_attr(test, assert_instr(vpmovusdw))]
14023pub fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
14024    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, k)) }
14025}
14026
14027/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14028///
14029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
14030#[inline]
14031#[target_feature(enable = "avx512f,avx512vl")]
14032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14033#[cfg_attr(test, assert_instr(vpmovusdw))]
14034pub fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
14035    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, 0b11111111)) }
14036}
14037
14038/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14039///
14040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
14041#[inline]
14042#[target_feature(enable = "avx512f,avx512vl")]
14043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14044#[cfg_attr(test, assert_instr(vpmovusdw))]
14045pub fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14046    unsafe { transmute(vpmovusdw128(a.as_u32x4(), src.as_u16x8(), k)) }
14047}
14048
14049/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14050///
14051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
14052#[inline]
14053#[target_feature(enable = "avx512f,avx512vl")]
14054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14055#[cfg_attr(test, assert_instr(vpmovusdw))]
14056pub fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
14057    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, k)) }
14058}
14059
14060/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14061///
14062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
14063#[inline]
14064#[target_feature(enable = "avx512f")]
14065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14066#[cfg_attr(test, assert_instr(vpmovusdb))]
14067pub fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
14068    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, 0b11111111_11111111)) }
14069}
14070
14071/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14072///
14073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
14074#[inline]
14075#[target_feature(enable = "avx512f")]
14076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14077#[cfg_attr(test, assert_instr(vpmovusdb))]
14078pub fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
14079    unsafe { transmute(vpmovusdb(a.as_u32x16(), src.as_u8x16(), k)) }
14080}
14081
14082/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14083///
14084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
14085#[inline]
14086#[target_feature(enable = "avx512f")]
14087#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14088#[cfg_attr(test, assert_instr(vpmovusdb))]
14089pub fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
14090    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, k)) }
14091}
14092
14093/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14094///
14095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
14096#[inline]
14097#[target_feature(enable = "avx512f,avx512vl")]
14098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14099#[cfg_attr(test, assert_instr(vpmovusdb))]
14100pub fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
14101    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, 0b11111111)) }
14102}
14103
14104/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14105///
14106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
14107#[inline]
14108#[target_feature(enable = "avx512f,avx512vl")]
14109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14110#[cfg_attr(test, assert_instr(vpmovusdb))]
14111pub fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14112    unsafe { transmute(vpmovusdb256(a.as_u32x8(), src.as_u8x16(), k)) }
14113}
14114
14115/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14116///
14117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
14118#[inline]
14119#[target_feature(enable = "avx512f,avx512vl")]
14120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14121#[cfg_attr(test, assert_instr(vpmovusdb))]
14122pub fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14123    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, k)) }
14124}
14125
14126/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14127///
14128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
14129#[inline]
14130#[target_feature(enable = "avx512f,avx512vl")]
14131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14132#[cfg_attr(test, assert_instr(vpmovusdb))]
14133pub fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
14134    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, 0b11111111)) }
14135}
14136
14137/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14138///
14139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
14140#[inline]
14141#[target_feature(enable = "avx512f,avx512vl")]
14142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14143#[cfg_attr(test, assert_instr(vpmovusdb))]
14144pub fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14145    unsafe { transmute(vpmovusdb128(a.as_u32x4(), src.as_u8x16(), k)) }
14146}
14147
14148/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14149///
14150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
14151#[inline]
14152#[target_feature(enable = "avx512f,avx512vl")]
14153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14154#[cfg_attr(test, assert_instr(vpmovusdb))]
14155pub fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14156    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, k)) }
14157}
14158
14159/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14160///
14161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
14162#[inline]
14163#[target_feature(enable = "avx512f")]
14164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14165#[cfg_attr(test, assert_instr(vpmovusqd))]
14166pub fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
14167    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, 0b11111111)) }
14168}
14169
14170/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14171///
14172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
14173#[inline]
14174#[target_feature(enable = "avx512f")]
14175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14176#[cfg_attr(test, assert_instr(vpmovusqd))]
14177pub fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14178    unsafe { transmute(vpmovusqd(a.as_u64x8(), src.as_u32x8(), k)) }
14179}
14180
14181/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14182///
14183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
14184#[inline]
14185#[target_feature(enable = "avx512f")]
14186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14187#[cfg_attr(test, assert_instr(vpmovusqd))]
14188pub fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14189    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, k)) }
14190}
14191
14192/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14193///
14194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
14195#[inline]
14196#[target_feature(enable = "avx512f,avx512vl")]
14197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14198#[cfg_attr(test, assert_instr(vpmovusqd))]
14199pub fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
14200    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, 0b11111111)) }
14201}
14202
14203/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14204///
14205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
14206#[inline]
14207#[target_feature(enable = "avx512f,avx512vl")]
14208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14209#[cfg_attr(test, assert_instr(vpmovusqd))]
14210pub fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14211    unsafe { transmute(vpmovusqd256(a.as_u64x4(), src.as_u32x4(), k)) }
14212}
14213
14214/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14215///
14216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
14217#[inline]
14218#[target_feature(enable = "avx512f,avx512vl")]
14219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14220#[cfg_attr(test, assert_instr(vpmovusqd))]
14221pub fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14222    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, k)) }
14223}
14224
14225/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14226///
14227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
14228#[inline]
14229#[target_feature(enable = "avx512f,avx512vl")]
14230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14231#[cfg_attr(test, assert_instr(vpmovusqd))]
14232pub fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
14233    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, 0b11111111)) }
14234}
14235
14236/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14237///
14238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
14239#[inline]
14240#[target_feature(enable = "avx512f,avx512vl")]
14241#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14242#[cfg_attr(test, assert_instr(vpmovusqd))]
14243pub fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14244    unsafe { transmute(vpmovusqd128(a.as_u64x2(), src.as_u32x4(), k)) }
14245}
14246
14247/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14248///
14249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
14250#[inline]
14251#[target_feature(enable = "avx512f,avx512vl")]
14252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14253#[cfg_attr(test, assert_instr(vpmovusqd))]
14254pub fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14255    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, k)) }
14256}
14257
14258/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14259///
14260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
14261#[inline]
14262#[target_feature(enable = "avx512f")]
14263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14264#[cfg_attr(test, assert_instr(vpmovusqw))]
14265pub fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
14266    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, 0b11111111)) }
14267}
14268
14269/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14270///
14271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
14272#[inline]
14273#[target_feature(enable = "avx512f")]
14274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14275#[cfg_attr(test, assert_instr(vpmovusqw))]
14276pub fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14277    unsafe { transmute(vpmovusqw(a.as_u64x8(), src.as_u16x8(), k)) }
14278}
14279
14280/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14281///
14282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
14283#[inline]
14284#[target_feature(enable = "avx512f")]
14285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14286#[cfg_attr(test, assert_instr(vpmovusqw))]
14287pub fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14288    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, k)) }
14289}
14290
14291/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14292///
14293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
14294#[inline]
14295#[target_feature(enable = "avx512f,avx512vl")]
14296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14297#[cfg_attr(test, assert_instr(vpmovusqw))]
14298pub fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
14299    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, 0b11111111)) }
14300}
14301
14302/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14303///
14304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
14305#[inline]
14306#[target_feature(enable = "avx512f,avx512vl")]
14307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14308#[cfg_attr(test, assert_instr(vpmovusqw))]
14309pub fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14310    unsafe { transmute(vpmovusqw256(a.as_u64x4(), src.as_u16x8(), k)) }
14311}
14312
14313/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14314///
14315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
14316#[inline]
14317#[target_feature(enable = "avx512f,avx512vl")]
14318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14319#[cfg_attr(test, assert_instr(vpmovusqw))]
14320pub fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14321    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, k)) }
14322}
14323
14324/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14325///
14326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
14327#[inline]
14328#[target_feature(enable = "avx512f,avx512vl")]
14329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14330#[cfg_attr(test, assert_instr(vpmovusqw))]
14331pub fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
14332    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, 0b11111111)) }
14333}
14334
14335/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14336///
14337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
14338#[inline]
14339#[target_feature(enable = "avx512f,avx512vl")]
14340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14341#[cfg_attr(test, assert_instr(vpmovusqw))]
14342pub fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14343    unsafe { transmute(vpmovusqw128(a.as_u64x2(), src.as_u16x8(), k)) }
14344}
14345
14346/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14347///
14348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
14349#[inline]
14350#[target_feature(enable = "avx512f,avx512vl")]
14351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14352#[cfg_attr(test, assert_instr(vpmovusqw))]
14353pub fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14354    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, k)) }
14355}
14356
14357/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14358///
14359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
14360#[inline]
14361#[target_feature(enable = "avx512f")]
14362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14363#[cfg_attr(test, assert_instr(vpmovusqb))]
14364pub fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
14365    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, 0b11111111)) }
14366}
14367
14368/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14369///
14370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
14371#[inline]
14372#[target_feature(enable = "avx512f")]
14373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14374#[cfg_attr(test, assert_instr(vpmovusqb))]
14375pub fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14376    unsafe { transmute(vpmovusqb(a.as_u64x8(), src.as_u8x16(), k)) }
14377}
14378
14379/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14380///
14381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
14382#[inline]
14383#[target_feature(enable = "avx512f")]
14384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14385#[cfg_attr(test, assert_instr(vpmovusqb))]
14386pub fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14387    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, k)) }
14388}
14389
14390/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14391///
14392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
14393#[inline]
14394#[target_feature(enable = "avx512f,avx512vl")]
14395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14396#[cfg_attr(test, assert_instr(vpmovusqb))]
14397pub fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
14398    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, 0b11111111)) }
14399}
14400
14401/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14402///
14403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
14404#[inline]
14405#[target_feature(enable = "avx512f,avx512vl")]
14406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14407#[cfg_attr(test, assert_instr(vpmovusqb))]
14408pub fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14409    unsafe { transmute(vpmovusqb256(a.as_u64x4(), src.as_u8x16(), k)) }
14410}
14411
14412/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14413///
14414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
14415#[inline]
14416#[target_feature(enable = "avx512f,avx512vl")]
14417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14418#[cfg_attr(test, assert_instr(vpmovusqb))]
14419pub fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14420    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, k)) }
14421}
14422
14423/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14424///
14425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
14426#[inline]
14427#[target_feature(enable = "avx512f,avx512vl")]
14428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14429#[cfg_attr(test, assert_instr(vpmovusqb))]
14430pub fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
14431    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, 0b11111111)) }
14432}
14433
14434/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14435///
14436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
14437#[inline]
14438#[target_feature(enable = "avx512f,avx512vl")]
14439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14440#[cfg_attr(test, assert_instr(vpmovusqb))]
14441pub fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14442    unsafe { transmute(vpmovusqb128(a.as_u64x2(), src.as_u8x16(), k)) }
14443}
14444
14445/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14446///
14447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
14448#[inline]
14449#[target_feature(enable = "avx512f,avx512vl")]
14450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14451#[cfg_attr(test, assert_instr(vpmovusqb))]
14452pub fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14453    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, k)) }
14454}
14455
14456/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
14457///
14458/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
14459/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14460/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14461/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14462/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14463/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14464///
14465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)
14466#[inline]
14467#[target_feature(enable = "avx512f")]
14468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14469#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14470#[rustc_legacy_const_generics(1)]
14471pub fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
14472    unsafe {
14473        static_assert_rounding!(ROUNDING);
14474        let a = a.as_f32x16();
14475        let r = vcvtps2dq(a, i32x16::ZERO, 0b11111111_11111111, ROUNDING);
14476        transmute(r)
14477    }
14478}
14479
14480/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14481///
14482/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14483/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14484/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14485/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14486/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14487/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14488///
14489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14490#[inline]
14491#[target_feature(enable = "avx512f")]
14492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14493#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14494#[rustc_legacy_const_generics(3)]
14495pub fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
14496    src: __m512i,
14497    k: __mmask16,
14498    a: __m512,
14499) -> __m512i {
14500    unsafe {
14501        static_assert_rounding!(ROUNDING);
14502        let a = a.as_f32x16();
14503        let src = src.as_i32x16();
14504        let r = vcvtps2dq(a, src, k, ROUNDING);
14505        transmute(r)
14506    }
14507}
14508
14509/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14510///
14511/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14512/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14513/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14514/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14515/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14516/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14517///
14518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14519#[inline]
14520#[target_feature(enable = "avx512f")]
14521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14522#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14523#[rustc_legacy_const_generics(2)]
14524pub fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14525    unsafe {
14526        static_assert_rounding!(ROUNDING);
14527        let a = a.as_f32x16();
14528        let r = vcvtps2dq(a, i32x16::ZERO, k, ROUNDING);
14529        transmute(r)
14530    }
14531}
14532
14533/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14534///
14535/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14536/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14537/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14538/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14539/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14540/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14541///
14542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
14543#[inline]
14544#[target_feature(enable = "avx512f")]
14545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14546#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14547#[rustc_legacy_const_generics(1)]
14548pub fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
14549    unsafe {
14550        static_assert_rounding!(ROUNDING);
14551        let a = a.as_f32x16();
14552        let r = vcvtps2udq(a, u32x16::ZERO, 0b11111111_11111111, ROUNDING);
14553        transmute(r)
14554    }
14555}
14556
14557/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14558///
14559/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14560/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14561/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14562/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14563/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14564/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14565///
14566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
14567#[inline]
14568#[target_feature(enable = "avx512f")]
14569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14570#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14571#[rustc_legacy_const_generics(3)]
14572pub fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
14573    src: __m512i,
14574    k: __mmask16,
14575    a: __m512,
14576) -> __m512i {
14577    unsafe {
14578        static_assert_rounding!(ROUNDING);
14579        let a = a.as_f32x16();
14580        let src = src.as_u32x16();
14581        let r = vcvtps2udq(a, src, k, ROUNDING);
14582        transmute(r)
14583    }
14584}
14585
14586/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14587///
14588/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14589/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14590/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14591/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14592/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14593/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14594///
14595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
14596#[inline]
14597#[target_feature(enable = "avx512f")]
14598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14599#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14600#[rustc_legacy_const_generics(2)]
14601pub fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14602    unsafe {
14603        static_assert_rounding!(ROUNDING);
14604        let a = a.as_f32x16();
14605        let r = vcvtps2udq(a, u32x16::ZERO, k, ROUNDING);
14606        transmute(r)
14607    }
14608}
14609
14610/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
14611/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14612///
14613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)
14614#[inline]
14615#[target_feature(enable = "avx512f")]
14616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14617#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14618#[rustc_legacy_const_generics(1)]
14619pub fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
14620    unsafe {
14621        static_assert_sae!(SAE);
14622        let a = a.as_f32x8();
14623        let r = vcvtps2pd(a, f64x8::ZERO, 0b11111111, SAE);
14624        transmute(r)
14625    }
14626}
14627
14628/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14629/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14630///
14631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_pd&expand=1336)
14632#[inline]
14633#[target_feature(enable = "avx512f")]
14634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14635#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14636#[rustc_legacy_const_generics(3)]
14637pub fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
14638    unsafe {
14639        static_assert_sae!(SAE);
14640        let a = a.as_f32x8();
14641        let src = src.as_f64x8();
14642        let r = vcvtps2pd(a, src, k, SAE);
14643        transmute(r)
14644    }
14645}
14646
14647/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14648/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14649///
14650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_pd&expand=1337)
14651#[inline]
14652#[target_feature(enable = "avx512f")]
14653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14654#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14655#[rustc_legacy_const_generics(2)]
14656pub fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
14657    unsafe {
14658        static_assert_sae!(SAE);
14659        let a = a.as_f32x8();
14660        let r = vcvtps2pd(a, f64x8::ZERO, k, SAE);
14661        transmute(r)
14662    }
14663}
14664
14665/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
14666///
14667/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14668/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14669/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14670/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14671/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14672/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14673///
14674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
14675#[inline]
14676#[target_feature(enable = "avx512f")]
14677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14678#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14679#[rustc_legacy_const_generics(1)]
14680pub fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14681    unsafe {
14682        static_assert_rounding!(ROUNDING);
14683        let a = a.as_f64x8();
14684        let r = vcvtpd2dq(a, i32x8::ZERO, 0b11111111, ROUNDING);
14685        transmute(r)
14686    }
14687}
14688
14689/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14690///
14691/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14692/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14693/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14694/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14695/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14696/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14697///
14698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
14699#[inline]
14700#[target_feature(enable = "avx512f")]
14701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14702#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14703#[rustc_legacy_const_generics(3)]
14704pub fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
14705    src: __m256i,
14706    k: __mmask8,
14707    a: __m512d,
14708) -> __m256i {
14709    unsafe {
14710        static_assert_rounding!(ROUNDING);
14711        let a = a.as_f64x8();
14712        let src = src.as_i32x8();
14713        let r = vcvtpd2dq(a, src, k, ROUNDING);
14714        transmute(r)
14715    }
14716}
14717
14718/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14719///
14720/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14721/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14722/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14723/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14724/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14725/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14726///
14727/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
14728#[inline]
14729#[target_feature(enable = "avx512f")]
14730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14731#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14732#[rustc_legacy_const_generics(2)]
14733pub fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14734    unsafe {
14735        static_assert_rounding!(ROUNDING);
14736        let a = a.as_f64x8();
14737        let r = vcvtpd2dq(a, i32x8::ZERO, k, ROUNDING);
14738        transmute(r)
14739    }
14740}
14741
14742/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14743///
14744/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14745/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14746/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14747/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14748/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14749/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14750///
14751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
14752#[inline]
14753#[target_feature(enable = "avx512f")]
14754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14755#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14756#[rustc_legacy_const_generics(1)]
14757pub fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14758    unsafe {
14759        static_assert_rounding!(ROUNDING);
14760        let a = a.as_f64x8();
14761        let r = vcvtpd2udq(a, u32x8::ZERO, 0b11111111, ROUNDING);
14762        transmute(r)
14763    }
14764}
14765
14766/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14767///
14768/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14769/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14770/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14771/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14772/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14773/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14774///
14775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
14776#[inline]
14777#[target_feature(enable = "avx512f")]
14778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14779#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14780#[rustc_legacy_const_generics(3)]
14781pub fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
14782    src: __m256i,
14783    k: __mmask8,
14784    a: __m512d,
14785) -> __m256i {
14786    unsafe {
14787        static_assert_rounding!(ROUNDING);
14788        let a = a.as_f64x8();
14789        let src = src.as_u32x8();
14790        let r = vcvtpd2udq(a, src, k, ROUNDING);
14791        transmute(r)
14792    }
14793}
14794
14795/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14796///
14797/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14798/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14799/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14800/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14801/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14802/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14803///
14804/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
14805#[inline]
14806#[target_feature(enable = "avx512f")]
14807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14808#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14809#[rustc_legacy_const_generics(2)]
14810pub fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14811    unsafe {
14812        static_assert_rounding!(ROUNDING);
14813        let a = a.as_f64x8();
14814        let r = vcvtpd2udq(a, u32x8::ZERO, k, ROUNDING);
14815        transmute(r)
14816    }
14817}
14818
14819/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14820///
14821/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14822/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14823/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14824/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14825/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14826/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14827///
14828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
14829#[inline]
14830#[target_feature(enable = "avx512f")]
14831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14832#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14833#[rustc_legacy_const_generics(1)]
14834pub fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
14835    unsafe {
14836        static_assert_rounding!(ROUNDING);
14837        let a = a.as_f64x8();
14838        let r = vcvtpd2ps(a, f32x8::ZERO, 0b11111111, ROUNDING);
14839        transmute(r)
14840    }
14841}
14842
14843/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14844///
14845/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14846/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14847/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14848/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14849/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14850/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14851///
14852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
14853#[inline]
14854#[target_feature(enable = "avx512f")]
14855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14856#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14857#[rustc_legacy_const_generics(3)]
14858pub fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
14859    src: __m256,
14860    k: __mmask8,
14861    a: __m512d,
14862) -> __m256 {
14863    unsafe {
14864        static_assert_rounding!(ROUNDING);
14865        let a = a.as_f64x8();
14866        let src = src.as_f32x8();
14867        let r = vcvtpd2ps(a, src, k, ROUNDING);
14868        transmute(r)
14869    }
14870}
14871
14872/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14873///
14874/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14875/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14876/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14877/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14878/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14879/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14880///
14881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
14882#[inline]
14883#[target_feature(enable = "avx512f")]
14884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14885#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14886#[rustc_legacy_const_generics(2)]
14887pub fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
14888    unsafe {
14889        static_assert_rounding!(ROUNDING);
14890        let a = a.as_f64x8();
14891        let r = vcvtpd2ps(a, f32x8::ZERO, k, ROUNDING);
14892        transmute(r)
14893    }
14894}
14895
14896/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14897///
14898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14904///
14905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
14906#[inline]
14907#[target_feature(enable = "avx512f")]
14908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14909#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14910#[rustc_legacy_const_generics(1)]
14911pub fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14912    unsafe {
14913        static_assert_rounding!(ROUNDING);
14914        let a = a.as_i32x16();
14915        let r = vcvtdq2ps(a, ROUNDING);
14916        transmute(r)
14917    }
14918}
14919
14920/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14921///
14922/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14923/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14924/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14925/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14926/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14927/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14928///
14929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
14930#[inline]
14931#[target_feature(enable = "avx512f")]
14932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14933#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14934#[rustc_legacy_const_generics(3)]
14935pub fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
14936    src: __m512,
14937    k: __mmask16,
14938    a: __m512i,
14939) -> __m512 {
14940    unsafe {
14941        static_assert_rounding!(ROUNDING);
14942        let a = a.as_i32x16();
14943        let r = vcvtdq2ps(a, ROUNDING);
14944        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
14945    }
14946}
14947
14948/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14949///
14950/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14951/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14952/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14953/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14954/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14955/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14956///
14957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
14958#[inline]
14959#[target_feature(enable = "avx512f")]
14960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14961#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14962#[rustc_legacy_const_generics(2)]
14963pub fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
14964    unsafe {
14965        static_assert_rounding!(ROUNDING);
14966        let a = a.as_i32x16();
14967        let r = vcvtdq2ps(a, ROUNDING);
14968        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
14969    }
14970}
14971
14972/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14973///
14974/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14975/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14976/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14977/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14978/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14979/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14980///
14981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
14982#[inline]
14983#[target_feature(enable = "avx512f")]
14984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14985#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
14986#[rustc_legacy_const_generics(1)]
14987pub fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14988    unsafe {
14989        static_assert_rounding!(ROUNDING);
14990        let a = a.as_u32x16();
14991        let r = vcvtudq2ps(a, ROUNDING);
14992        transmute(r)
14993    }
14994}
14995
14996/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14997///
14998/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14999/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15000/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15001/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15002/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15003/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15004///
15005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
15006#[inline]
15007#[target_feature(enable = "avx512f")]
15008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15009#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15010#[rustc_legacy_const_generics(3)]
15011pub fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
15012    src: __m512,
15013    k: __mmask16,
15014    a: __m512i,
15015) -> __m512 {
15016    unsafe {
15017        static_assert_rounding!(ROUNDING);
15018        let a = a.as_u32x16();
15019        let r = vcvtudq2ps(a, ROUNDING);
15020        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
15021    }
15022}
15023
15024/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15025///
15026/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15027/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15028/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15029/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15030/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15031/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15032///
15033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
15034#[inline]
15035#[target_feature(enable = "avx512f")]
15036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15037#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15038#[rustc_legacy_const_generics(2)]
15039pub fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15040    unsafe {
15041        static_assert_rounding!(ROUNDING);
15042        let a = a.as_u32x16();
15043        let r = vcvtudq2ps(a, ROUNDING);
15044        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
15045    }
15046}
15047
15048/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15049/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15050///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15051///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15052///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15053///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15054///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15055///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15056///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15057///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15058///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15059///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15060///
15061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
15062#[inline]
15063#[target_feature(enable = "avx512f")]
15064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15065#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15066#[rustc_legacy_const_generics(1)]
15067pub fn _mm512_cvt_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15068    unsafe {
15069        static_assert_extended_rounding!(ROUNDING);
15070        let a = a.as_f32x16();
15071        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111);
15072        transmute(r)
15073    }
15074}
15075
15076/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15077/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15078///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15079///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15080///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15081///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15082///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15083///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15084///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15085///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15086///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15087///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15088///
15089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
15090#[inline]
15091#[target_feature(enable = "avx512f")]
15092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15093#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15094#[rustc_legacy_const_generics(3)]
15095pub fn _mm512_mask_cvt_roundps_ph<const ROUNDING: i32>(
15096    src: __m256i,
15097    k: __mmask16,
15098    a: __m512,
15099) -> __m256i {
15100    unsafe {
15101        static_assert_extended_rounding!(ROUNDING);
15102        let a = a.as_f32x16();
15103        let src = src.as_i16x16();
15104        let r = vcvtps2ph(a, ROUNDING, src, k);
15105        transmute(r)
15106    }
15107}
15108
15109/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15110/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15111///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15112///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15113///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15114///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15115///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15116///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15117///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15118///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15119///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15120///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15121///
15122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
15123#[inline]
15124#[target_feature(enable = "avx512f")]
15125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15126#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15127#[rustc_legacy_const_generics(2)]
15128pub fn _mm512_maskz_cvt_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15129    unsafe {
15130        static_assert_extended_rounding!(ROUNDING);
15131        let a = a.as_f32x16();
15132        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k);
15133        transmute(r)
15134    }
15135}
15136
15137/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15138/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
15139/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15140/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15141/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15142/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15143/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15144///
15145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)
15146#[inline]
15147#[target_feature(enable = "avx512f,avx512vl")]
15148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15149#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15150#[rustc_legacy_const_generics(3)]
15151pub fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
15152    src: __m128i,
15153    k: __mmask8,
15154    a: __m256,
15155) -> __m128i {
15156    unsafe {
15157        static_assert_uimm_bits!(IMM8, 8);
15158        let a = a.as_f32x8();
15159        let src = src.as_i16x8();
15160        let r = vcvtps2ph256(a, IMM8, src, k);
15161        transmute(r)
15162    }
15163}
15164
15165/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15166/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15167/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15168/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15169/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15170/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15171/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15172///
15173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
15174#[inline]
15175#[target_feature(enable = "avx512f,avx512vl")]
15176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15177#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15178#[rustc_legacy_const_generics(2)]
15179pub fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15180    unsafe {
15181        static_assert_uimm_bits!(IMM8, 8);
15182        let a = a.as_f32x8();
15183        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15184        transmute(r)
15185    }
15186}
15187
15188/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15189/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15190/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15191/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15192/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15193/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15194/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15195///
15196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)
15197#[inline]
15198#[target_feature(enable = "avx512f,avx512vl")]
15199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15200#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15201#[rustc_legacy_const_generics(3)]
15202pub fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15203    unsafe {
15204        static_assert_uimm_bits!(IMM8, 8);
15205        let a = a.as_f32x4();
15206        let src = src.as_i16x8();
15207        let r = vcvtps2ph128(a, IMM8, src, k);
15208        transmute(r)
15209    }
15210}
15211
15212/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15213/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15214/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15215/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15216/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15217/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15218/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15219///
15220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
15221#[inline]
15222#[target_feature(enable = "avx512f,avx512vl")]
15223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15224#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15225#[rustc_legacy_const_generics(2)]
15226pub fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15227    unsafe {
15228        static_assert_uimm_bits!(IMM8, 8);
15229        let a = a.as_f32x4();
15230        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15231        transmute(r)
15232    }
15233}
15234
15235/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15236/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15237///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15238///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15239///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15240///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15241///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15242///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15243///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15244///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15245///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15246///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15247///
15248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
15249#[inline]
15250#[target_feature(enable = "avx512f")]
15251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15252#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15253#[rustc_legacy_const_generics(1)]
15254pub fn _mm512_cvtps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15255    unsafe {
15256        static_assert_extended_rounding!(ROUNDING);
15257        let a = a.as_f32x16();
15258        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111);
15259        transmute(r)
15260    }
15261}
15262
15263/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15264/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15265///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15266///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15267///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15268///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15269///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15270///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15271///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15272///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15273///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15274///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15275///
15276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
15277#[inline]
15278#[target_feature(enable = "avx512f")]
15279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15280#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15281#[rustc_legacy_const_generics(3)]
15282pub fn _mm512_mask_cvtps_ph<const ROUNDING: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
15283    unsafe {
15284        static_assert_extended_rounding!(ROUNDING);
15285        let a = a.as_f32x16();
15286        let src = src.as_i16x16();
15287        let r = vcvtps2ph(a, ROUNDING, src, k);
15288        transmute(r)
15289    }
15290}
15291
15292/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15293/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15294///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15295///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15296///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15297///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15298///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15299///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15300///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15301///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15302///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15303///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15304///
15305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
15306#[inline]
15307#[target_feature(enable = "avx512f")]
15308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15309#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15310#[rustc_legacy_const_generics(2)]
15311pub fn _mm512_maskz_cvtps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15312    unsafe {
15313        static_assert_extended_rounding!(ROUNDING);
15314        let a = a.as_f32x16();
15315        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k);
15316        transmute(r)
15317    }
15318}
15319
15320/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15321/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15322/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15323/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15324/// * [`_MM_FROUND_TO_POS_INF`] : round up
15325/// * [`_MM_FROUND_TO_ZERO`] : truncate
15326/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15327///
15328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
15329#[inline]
15330#[target_feature(enable = "avx512f,avx512vl")]
15331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15332#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15333#[rustc_legacy_const_generics(3)]
15334pub fn _mm256_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m256) -> __m128i {
15335    unsafe {
15336        static_assert_uimm_bits!(IMM8, 8);
15337        let a = a.as_f32x8();
15338        let src = src.as_i16x8();
15339        let r = vcvtps2ph256(a, IMM8, src, k);
15340        transmute(r)
15341    }
15342}
15343
15344/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15345/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15346/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15347/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15348/// * [`_MM_FROUND_TO_POS_INF`] : round up
15349/// * [`_MM_FROUND_TO_ZERO`] : truncate
15350/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15351///
15352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
15353#[inline]
15354#[target_feature(enable = "avx512f,avx512vl")]
15355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15356#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15357#[rustc_legacy_const_generics(2)]
15358pub fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15359    unsafe {
15360        static_assert_uimm_bits!(IMM8, 8);
15361        let a = a.as_f32x8();
15362        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15363        transmute(r)
15364    }
15365}
15366
15367/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15368/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15369/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15370/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15371/// * [`_MM_FROUND_TO_POS_INF`] : round up
15372/// * [`_MM_FROUND_TO_ZERO`] : truncate
15373/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15374///
15375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
15376#[inline]
15377#[target_feature(enable = "avx512f,avx512vl")]
15378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15379#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15380#[rustc_legacy_const_generics(3)]
15381pub fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15382    unsafe {
15383        static_assert_uimm_bits!(IMM8, 8);
15384        let a = a.as_f32x4();
15385        let src = src.as_i16x8();
15386        let r = vcvtps2ph128(a, IMM8, src, k);
15387        transmute(r)
15388    }
15389}
15390
15391/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15392/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15393/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15394/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15395/// * [`_MM_FROUND_TO_POS_INF`] : round up
15396/// * [`_MM_FROUND_TO_ZERO`] : truncate
15397/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15398///
15399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
15400#[inline]
15401#[target_feature(enable = "avx512f,avx512vl")]
15402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15403#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15404#[rustc_legacy_const_generics(2)]
15405pub fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15406    unsafe {
15407        static_assert_uimm_bits!(IMM8, 8);
15408        let a = a.as_f32x4();
15409        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15410        transmute(r)
15411    }
15412}
15413
15414/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15415/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15416///
15417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
15418#[inline]
15419#[target_feature(enable = "avx512f")]
15420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15421#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15422#[rustc_legacy_const_generics(1)]
15423pub fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
15424    unsafe {
15425        static_assert_sae!(SAE);
15426        let a = a.as_i16x16();
15427        let r = vcvtph2ps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
15428        transmute(r)
15429    }
15430}
15431
15432/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15433/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15434///
15435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
15436#[inline]
15437#[target_feature(enable = "avx512f")]
15438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15439#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15440#[rustc_legacy_const_generics(3)]
15441pub fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15442    unsafe {
15443        static_assert_sae!(SAE);
15444        let a = a.as_i16x16();
15445        let src = src.as_f32x16();
15446        let r = vcvtph2ps(a, src, k, SAE);
15447        transmute(r)
15448    }
15449}
15450
15451/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15452/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15453///
15454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
15455#[inline]
15456#[target_feature(enable = "avx512f")]
15457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15458#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15459#[rustc_legacy_const_generics(2)]
15460pub fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
15461    unsafe {
15462        static_assert_sae!(SAE);
15463        let a = a.as_i16x16();
15464        let r = vcvtph2ps(a, f32x16::ZERO, k, SAE);
15465        transmute(r)
15466    }
15467}
15468
15469/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
15470///
15471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
15472#[inline]
15473#[target_feature(enable = "avx512f")]
15474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15475#[cfg_attr(test, assert_instr(vcvtph2ps))]
15476pub fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
15477    unsafe {
15478        transmute(vcvtph2ps(
15479            a.as_i16x16(),
15480            f32x16::ZERO,
15481            0b11111111_11111111,
15482            _MM_FROUND_NO_EXC,
15483        ))
15484    }
15485}
15486
15487/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15488///
15489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
15490#[inline]
15491#[target_feature(enable = "avx512f")]
15492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15493#[cfg_attr(test, assert_instr(vcvtph2ps))]
15494pub fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15495    unsafe {
15496        transmute(vcvtph2ps(
15497            a.as_i16x16(),
15498            src.as_f32x16(),
15499            k,
15500            _MM_FROUND_NO_EXC,
15501        ))
15502    }
15503}
15504
15505/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15506///
15507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
15508#[inline]
15509#[target_feature(enable = "avx512f")]
15510#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15511#[cfg_attr(test, assert_instr(vcvtph2ps))]
15512pub fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
15513    unsafe { transmute(vcvtph2ps(a.as_i16x16(), f32x16::ZERO, k, _MM_FROUND_NO_EXC)) }
15514}
15515
15516/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15517///
15518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
15519#[inline]
15520#[target_feature(enable = "avx512f,avx512vl")]
15521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15522#[cfg_attr(test, assert_instr(vcvtph2ps))]
15523pub fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
15524    unsafe {
15525        let convert = _mm256_cvtph_ps(a);
15526        transmute(simd_select_bitmask(k, convert.as_f32x8(), src.as_f32x8()))
15527    }
15528}
15529
15530/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15531///
15532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
15533#[inline]
15534#[target_feature(enable = "avx512f,avx512vl")]
15535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15536#[cfg_attr(test, assert_instr(vcvtph2ps))]
15537pub fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
15538    unsafe {
15539        let convert = _mm256_cvtph_ps(a);
15540        transmute(simd_select_bitmask(k, convert.as_f32x8(), f32x8::ZERO))
15541    }
15542}
15543
15544/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15545///
15546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
15547#[inline]
15548#[target_feature(enable = "avx512f,avx512vl")]
15549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15550#[cfg_attr(test, assert_instr(vcvtph2ps))]
15551pub fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
15552    unsafe {
15553        let convert = _mm_cvtph_ps(a);
15554        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
15555    }
15556}
15557
15558/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15559///
15560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
15561#[inline]
15562#[target_feature(enable = "avx512f,avx512vl")]
15563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15564#[cfg_attr(test, assert_instr(vcvtph2ps))]
15565pub fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
15566    unsafe {
15567        let convert = _mm_cvtph_ps(a);
15568        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
15569    }
15570}
15571
15572/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15573/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15574///
15575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)
15576#[inline]
15577#[target_feature(enable = "avx512f")]
15578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15579#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15580#[rustc_legacy_const_generics(1)]
15581pub fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
15582    unsafe {
15583        static_assert_sae!(SAE);
15584        let a = a.as_f32x16();
15585        let r = vcvttps2dq(a, i32x16::ZERO, 0b11111111_11111111, SAE);
15586        transmute(r)
15587    }
15588}
15589
15590/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15591/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15592///
15593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
15594#[inline]
15595#[target_feature(enable = "avx512f")]
15596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15597#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15598#[rustc_legacy_const_generics(3)]
15599pub fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
15600    src: __m512i,
15601    k: __mmask16,
15602    a: __m512,
15603) -> __m512i {
15604    unsafe {
15605        static_assert_sae!(SAE);
15606        let a = a.as_f32x16();
15607        let src = src.as_i32x16();
15608        let r = vcvttps2dq(a, src, k, SAE);
15609        transmute(r)
15610    }
15611}
15612
15613/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15614/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15615///
15616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
15617#[inline]
15618#[target_feature(enable = "avx512f")]
15619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15620#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15621#[rustc_legacy_const_generics(2)]
15622pub fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15623    unsafe {
15624        static_assert_sae!(SAE);
15625        let a = a.as_f32x16();
15626        let r = vcvttps2dq(a, i32x16::ZERO, k, SAE);
15627        transmute(r)
15628    }
15629}
15630
15631/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15632/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15633///
15634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)
15635#[inline]
15636#[target_feature(enable = "avx512f")]
15637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15638#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15639#[rustc_legacy_const_generics(1)]
15640pub fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
15641    unsafe {
15642        static_assert_sae!(SAE);
15643        let a = a.as_f32x16();
15644        let r = vcvttps2udq(a, u32x16::ZERO, 0b11111111_11111111, SAE);
15645        transmute(r)
15646    }
15647}
15648
15649/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15650/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15651///
15652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
15653#[inline]
15654#[target_feature(enable = "avx512f")]
15655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15656#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15657#[rustc_legacy_const_generics(3)]
15658pub fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
15659    src: __m512i,
15660    k: __mmask16,
15661    a: __m512,
15662) -> __m512i {
15663    unsafe {
15664        static_assert_sae!(SAE);
15665        let a = a.as_f32x16();
15666        let src = src.as_u32x16();
15667        let r = vcvttps2udq(a, src, k, SAE);
15668        transmute(r)
15669    }
15670}
15671
15672/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15673/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15674///
15675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
15676#[inline]
15677#[target_feature(enable = "avx512f")]
15678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15679#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15680#[rustc_legacy_const_generics(2)]
15681pub fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15682    unsafe {
15683        static_assert_sae!(SAE);
15684        let a = a.as_f32x16();
15685        let r = vcvttps2udq(a, u32x16::ZERO, k, SAE);
15686        transmute(r)
15687    }
15688}
15689
15690/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15691/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15692///
15693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
15694#[inline]
15695#[target_feature(enable = "avx512f")]
15696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15697#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15698#[rustc_legacy_const_generics(1)]
15699pub fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
15700    unsafe {
15701        static_assert_sae!(SAE);
15702        let a = a.as_f64x8();
15703        let r = vcvttpd2dq(a, i32x8::ZERO, 0b11111111, SAE);
15704        transmute(r)
15705    }
15706}
15707
15708/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15709/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15710///
15711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
15712#[inline]
15713#[target_feature(enable = "avx512f")]
15714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15715#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15716#[rustc_legacy_const_generics(3)]
15717pub fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
15718    src: __m256i,
15719    k: __mmask8,
15720    a: __m512d,
15721) -> __m256i {
15722    unsafe {
15723        static_assert_sae!(SAE);
15724        let a = a.as_f64x8();
15725        let src = src.as_i32x8();
15726        let r = vcvttpd2dq(a, src, k, SAE);
15727        transmute(r)
15728    }
15729}
15730
15731/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15732/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15733///
15734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi32&expand=1918)
15735#[inline]
15736#[target_feature(enable = "avx512f")]
15737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15738#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15739#[rustc_legacy_const_generics(2)]
15740pub fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15741    unsafe {
15742        static_assert_sae!(SAE);
15743        let a = a.as_f64x8();
15744        let r = vcvttpd2dq(a, i32x8::ZERO, k, SAE);
15745        transmute(r)
15746    }
15747}
15748
15749/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15750/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15751///
15752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
15753#[inline]
15754#[target_feature(enable = "avx512f")]
15755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15756#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15757#[rustc_legacy_const_generics(1)]
15758pub fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
15759    unsafe {
15760        static_assert_sae!(SAE);
15761        let a = a.as_f64x8();
15762        let r = vcvttpd2udq(a, i32x8::ZERO, 0b11111111, SAE);
15763        transmute(r)
15764    }
15765}
15766
15767/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15768/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15769///
15770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
15771#[inline]
15772#[target_feature(enable = "avx512f")]
15773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15774#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15775#[rustc_legacy_const_generics(3)]
15776pub fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
15777    src: __m256i,
15778    k: __mmask8,
15779    a: __m512d,
15780) -> __m256i {
15781    unsafe {
15782        static_assert_sae!(SAE);
15783        let a = a.as_f64x8();
15784        let src = src.as_i32x8();
15785        let r = vcvttpd2udq(a, src, k, SAE);
15786        transmute(r)
15787    }
15788}
15789
15790/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15791///
15792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
15793#[inline]
15794#[target_feature(enable = "avx512f")]
15795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15796#[cfg_attr(test, assert_instr(vcvttps2dq))]
15797pub fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
15798    unsafe {
15799        transmute(vcvttps2dq(
15800            a.as_f32x16(),
15801            i32x16::ZERO,
15802            0b11111111_11111111,
15803            _MM_FROUND_CUR_DIRECTION,
15804        ))
15805    }
15806}
15807
15808/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15809///
15810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
15811#[inline]
15812#[target_feature(enable = "avx512f")]
15813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15814#[cfg_attr(test, assert_instr(vcvttps2dq))]
15815pub fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15816    unsafe {
15817        transmute(vcvttps2dq(
15818            a.as_f32x16(),
15819            src.as_i32x16(),
15820            k,
15821            _MM_FROUND_CUR_DIRECTION,
15822        ))
15823    }
15824}
15825
15826/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15827///
15828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
15829#[inline]
15830#[target_feature(enable = "avx512f")]
15831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15832#[cfg_attr(test, assert_instr(vcvttps2dq))]
15833pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
15834    unsafe {
15835        transmute(vcvttps2dq(
15836            a.as_f32x16(),
15837            i32x16::ZERO,
15838            k,
15839            _MM_FROUND_CUR_DIRECTION,
15840        ))
15841    }
15842}
15843
15844/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15845///
15846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
15847#[inline]
15848#[target_feature(enable = "avx512f,avx512vl")]
15849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15850#[cfg_attr(test, assert_instr(vcvttps2dq))]
15851pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15852    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k)) }
15853}
15854
15855/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15856///
15857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
15858#[inline]
15859#[target_feature(enable = "avx512f,avx512vl")]
15860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15861#[cfg_attr(test, assert_instr(vcvttps2dq))]
15862pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
15863    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), i32x8::ZERO, k)) }
15864}
15865
15866/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15867///
15868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
15869#[inline]
15870#[target_feature(enable = "avx512f,avx512vl")]
15871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15872#[cfg_attr(test, assert_instr(vcvttps2dq))]
15873pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15874    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k)) }
15875}
15876
15877/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15878///
15879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
15880#[inline]
15881#[target_feature(enable = "avx512f,avx512vl")]
15882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15883#[cfg_attr(test, assert_instr(vcvttps2dq))]
15884pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
15885    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), i32x4::ZERO, k)) }
15886}
15887
15888/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15889///
15890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
15891#[inline]
15892#[target_feature(enable = "avx512f")]
15893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15894#[cfg_attr(test, assert_instr(vcvttps2udq))]
15895pub fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
15896    unsafe {
15897        transmute(vcvttps2udq(
15898            a.as_f32x16(),
15899            u32x16::ZERO,
15900            0b11111111_11111111,
15901            _MM_FROUND_CUR_DIRECTION,
15902        ))
15903    }
15904}
15905
15906/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15907///
15908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
15909#[inline]
15910#[target_feature(enable = "avx512f")]
15911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15912#[cfg_attr(test, assert_instr(vcvttps2udq))]
15913pub fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15914    unsafe {
15915        transmute(vcvttps2udq(
15916            a.as_f32x16(),
15917            src.as_u32x16(),
15918            k,
15919            _MM_FROUND_CUR_DIRECTION,
15920        ))
15921    }
15922}
15923
15924/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15925///
15926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
15927#[inline]
15928#[target_feature(enable = "avx512f")]
15929#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15930#[cfg_attr(test, assert_instr(vcvttps2udq))]
15931pub fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
15932    unsafe {
15933        transmute(vcvttps2udq(
15934            a.as_f32x16(),
15935            u32x16::ZERO,
15936            k,
15937            _MM_FROUND_CUR_DIRECTION,
15938        ))
15939    }
15940}
15941
15942/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15943///
15944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
15945#[inline]
15946#[target_feature(enable = "avx512f,avx512vl")]
15947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15948#[cfg_attr(test, assert_instr(vcvttps2udq))]
15949pub fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
15950    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
15951}
15952
15953/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15954///
15955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
15956#[inline]
15957#[target_feature(enable = "avx512f,avx512vl")]
15958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15959#[cfg_attr(test, assert_instr(vcvttps2udq))]
15960pub fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15961    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
15962}
15963
15964/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15965///
15966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
15967#[inline]
15968#[target_feature(enable = "avx512f,avx512vl")]
15969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15970#[cfg_attr(test, assert_instr(vcvttps2udq))]
15971pub fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
15972    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
15973}
15974
15975/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15976///
15977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
15978#[inline]
15979#[target_feature(enable = "avx512f,avx512vl")]
15980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15981#[cfg_attr(test, assert_instr(vcvttps2udq))]
15982pub fn _mm_cvttps_epu32(a: __m128) -> __m128i {
15983    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
15984}
15985
15986/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15987///
15988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
15989#[inline]
15990#[target_feature(enable = "avx512f,avx512vl")]
15991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15992#[cfg_attr(test, assert_instr(vcvttps2udq))]
15993pub fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15994    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
15995}
15996
15997/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15998///
15999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
16000#[inline]
16001#[target_feature(enable = "avx512f,avx512vl")]
16002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16003#[cfg_attr(test, assert_instr(vcvttps2udq))]
16004pub fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
16005    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
16006}
16007
16008/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16009/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16010///
16011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
16012#[inline]
16013#[target_feature(enable = "avx512f")]
16014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16015#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
16016#[rustc_legacy_const_generics(2)]
16017pub fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
16018    unsafe {
16019        static_assert_sae!(SAE);
16020        let a = a.as_f64x8();
16021        let r = vcvttpd2udq(a, i32x8::ZERO, k, SAE);
16022        transmute(r)
16023    }
16024}
16025
16026/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
16027///
16028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
16029#[inline]
16030#[target_feature(enable = "avx512f")]
16031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16032#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16033pub fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
16034    unsafe {
16035        transmute(vcvttpd2dq(
16036            a.as_f64x8(),
16037            i32x8::ZERO,
16038            0b11111111,
16039            _MM_FROUND_CUR_DIRECTION,
16040        ))
16041    }
16042}
16043
16044/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16045///
16046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
16047#[inline]
16048#[target_feature(enable = "avx512f")]
16049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16050#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16051pub fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16052    unsafe {
16053        transmute(vcvttpd2dq(
16054            a.as_f64x8(),
16055            src.as_i32x8(),
16056            k,
16057            _MM_FROUND_CUR_DIRECTION,
16058        ))
16059    }
16060}
16061
16062/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16063///
16064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
16065#[inline]
16066#[target_feature(enable = "avx512f")]
16067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16068#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16069pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
16070    unsafe {
16071        transmute(vcvttpd2dq(
16072            a.as_f64x8(),
16073            i32x8::ZERO,
16074            k,
16075            _MM_FROUND_CUR_DIRECTION,
16076        ))
16077    }
16078}
16079
16080/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16081///
16082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
16083#[inline]
16084#[target_feature(enable = "avx512f,avx512vl")]
16085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16086#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16087pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16088    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k)) }
16089}
16090
16091/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16092///
16093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
16094#[inline]
16095#[target_feature(enable = "avx512f,avx512vl")]
16096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16097#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16098pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
16099    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), i32x4::ZERO, k)) }
16100}
16101
16102/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16103///
16104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
16105#[inline]
16106#[target_feature(enable = "avx512f,avx512vl")]
16107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16108#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16109pub fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16110    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), k)) }
16111}
16112
16113/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16114///
16115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
16116#[inline]
16117#[target_feature(enable = "avx512f,avx512vl")]
16118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16119#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16120pub fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
16121    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), i32x4::ZERO, k)) }
16122}
16123
16124/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16125///
16126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
16127#[inline]
16128#[target_feature(enable = "avx512f")]
16129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16130#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16131pub fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
16132    unsafe {
16133        transmute(vcvttpd2udq(
16134            a.as_f64x8(),
16135            i32x8::ZERO,
16136            0b11111111,
16137            _MM_FROUND_CUR_DIRECTION,
16138        ))
16139    }
16140}
16141
16142/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16143///
16144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
16145#[inline]
16146#[target_feature(enable = "avx512f")]
16147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16148#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16149pub fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16150    unsafe {
16151        transmute(vcvttpd2udq(
16152            a.as_f64x8(),
16153            src.as_i32x8(),
16154            k,
16155            _MM_FROUND_CUR_DIRECTION,
16156        ))
16157    }
16158}
16159
16160/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16161///
16162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
16163#[inline]
16164#[target_feature(enable = "avx512f")]
16165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16166#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16167pub fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
16168    unsafe {
16169        transmute(vcvttpd2udq(
16170            a.as_f64x8(),
16171            i32x8::ZERO,
16172            k,
16173            _MM_FROUND_CUR_DIRECTION,
16174        ))
16175    }
16176}
16177
16178/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16179///
16180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
16181#[inline]
16182#[target_feature(enable = "avx512f,avx512vl")]
16183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16184#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16185pub fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
16186    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, 0b11111111)) }
16187}
16188
16189/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16190///
16191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
16192#[inline]
16193#[target_feature(enable = "avx512f,avx512vl")]
16194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16195#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16196pub fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16197    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), k)) }
16198}
16199
16200/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16201///
16202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
16203#[inline]
16204#[target_feature(enable = "avx512f,avx512vl")]
16205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16206#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16207pub fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
16208    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, k)) }
16209}
16210
16211/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16212///
16213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
16214#[inline]
16215#[target_feature(enable = "avx512f,avx512vl")]
16216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16217#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16218pub fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
16219    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, 0b11111111)) }
16220}
16221
16222/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16223///
16224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
16225#[inline]
16226#[target_feature(enable = "avx512f,avx512vl")]
16227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16228#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16229pub fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16230    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), k)) }
16231}
16232
16233/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16234///
16235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
16236#[inline]
16237#[target_feature(enable = "avx512f,avx512vl")]
16238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16239#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16240pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
16241    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, k)) }
16242}
16243
16244/// Returns vector of type `__m512d` with all elements set to zero.
16245///
16246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
16247#[inline]
16248#[target_feature(enable = "avx512f")]
16249#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16250#[cfg_attr(test, assert_instr(vxorps))]
16251pub fn _mm512_setzero_pd() -> __m512d {
16252    // All-0 is a properly initialized __m512d
16253    unsafe { const { mem::zeroed() } }
16254}
16255
16256/// Returns vector of type `__m512` with all elements set to zero.
16257///
16258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
16259#[inline]
16260#[target_feature(enable = "avx512f")]
16261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16262#[cfg_attr(test, assert_instr(vxorps))]
16263pub fn _mm512_setzero_ps() -> __m512 {
16264    // All-0 is a properly initialized __m512
16265    unsafe { const { mem::zeroed() } }
16266}
16267
16268/// Return vector of type `__m512` with all elements set to zero.
16269///
16270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
16271#[inline]
16272#[target_feature(enable = "avx512f")]
16273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16274#[cfg_attr(test, assert_instr(vxorps))]
16275pub fn _mm512_setzero() -> __m512 {
16276    // All-0 is a properly initialized __m512
16277    unsafe { const { mem::zeroed() } }
16278}
16279
16280/// Returns vector of type `__m512i` with all elements set to zero.
16281///
16282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
16283#[inline]
16284#[target_feature(enable = "avx512f")]
16285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16286#[cfg_attr(test, assert_instr(vxorps))]
16287pub fn _mm512_setzero_si512() -> __m512i {
16288    // All-0 is a properly initialized __m512i
16289    unsafe { const { mem::zeroed() } }
16290}
16291
16292/// Return vector of type `__m512i` with all elements set to zero.
16293///
16294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
16295#[inline]
16296#[target_feature(enable = "avx512f")]
16297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16298#[cfg_attr(test, assert_instr(vxorps))]
16299pub fn _mm512_setzero_epi32() -> __m512i {
16300    // All-0 is a properly initialized __m512i
16301    unsafe { const { mem::zeroed() } }
16302}
16303
16304/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
16305/// order.
16306///
16307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
16308#[inline]
16309#[target_feature(enable = "avx512f")]
16310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16311pub fn _mm512_setr_epi32(
16312    e15: i32,
16313    e14: i32,
16314    e13: i32,
16315    e12: i32,
16316    e11: i32,
16317    e10: i32,
16318    e9: i32,
16319    e8: i32,
16320    e7: i32,
16321    e6: i32,
16322    e5: i32,
16323    e4: i32,
16324    e3: i32,
16325    e2: i32,
16326    e1: i32,
16327    e0: i32,
16328) -> __m512i {
16329    unsafe {
16330        let r = i32x16::new(
16331            e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
16332        );
16333        transmute(r)
16334    }
16335}
16336
16337/// Set packed 8-bit integers in dst with the supplied values.
16338///
16339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
16340#[inline]
16341#[target_feature(enable = "avx512f")]
16342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16343pub fn _mm512_set_epi8(
16344    e63: i8,
16345    e62: i8,
16346    e61: i8,
16347    e60: i8,
16348    e59: i8,
16349    e58: i8,
16350    e57: i8,
16351    e56: i8,
16352    e55: i8,
16353    e54: i8,
16354    e53: i8,
16355    e52: i8,
16356    e51: i8,
16357    e50: i8,
16358    e49: i8,
16359    e48: i8,
16360    e47: i8,
16361    e46: i8,
16362    e45: i8,
16363    e44: i8,
16364    e43: i8,
16365    e42: i8,
16366    e41: i8,
16367    e40: i8,
16368    e39: i8,
16369    e38: i8,
16370    e37: i8,
16371    e36: i8,
16372    e35: i8,
16373    e34: i8,
16374    e33: i8,
16375    e32: i8,
16376    e31: i8,
16377    e30: i8,
16378    e29: i8,
16379    e28: i8,
16380    e27: i8,
16381    e26: i8,
16382    e25: i8,
16383    e24: i8,
16384    e23: i8,
16385    e22: i8,
16386    e21: i8,
16387    e20: i8,
16388    e19: i8,
16389    e18: i8,
16390    e17: i8,
16391    e16: i8,
16392    e15: i8,
16393    e14: i8,
16394    e13: i8,
16395    e12: i8,
16396    e11: i8,
16397    e10: i8,
16398    e9: i8,
16399    e8: i8,
16400    e7: i8,
16401    e6: i8,
16402    e5: i8,
16403    e4: i8,
16404    e3: i8,
16405    e2: i8,
16406    e1: i8,
16407    e0: i8,
16408) -> __m512i {
16409    unsafe {
16410        let r = i8x64::new(
16411            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16412            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35,
16413            e36, e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52,
16414            e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63,
16415        );
16416        transmute(r)
16417    }
16418}
16419
16420/// Set packed 16-bit integers in dst with the supplied values.
16421///
16422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
16423#[inline]
16424#[target_feature(enable = "avx512f")]
16425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16426pub fn _mm512_set_epi16(
16427    e31: i16,
16428    e30: i16,
16429    e29: i16,
16430    e28: i16,
16431    e27: i16,
16432    e26: i16,
16433    e25: i16,
16434    e24: i16,
16435    e23: i16,
16436    e22: i16,
16437    e21: i16,
16438    e20: i16,
16439    e19: i16,
16440    e18: i16,
16441    e17: i16,
16442    e16: i16,
16443    e15: i16,
16444    e14: i16,
16445    e13: i16,
16446    e12: i16,
16447    e11: i16,
16448    e10: i16,
16449    e9: i16,
16450    e8: i16,
16451    e7: i16,
16452    e6: i16,
16453    e5: i16,
16454    e4: i16,
16455    e3: i16,
16456    e2: i16,
16457    e1: i16,
16458    e0: i16,
16459) -> __m512i {
16460    unsafe {
16461        let r = i16x32::new(
16462            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16463            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
16464        );
16465        transmute(r)
16466    }
16467}
16468
16469/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
16470///
16471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
16472#[inline]
16473#[target_feature(enable = "avx512f")]
16474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16475pub fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16476    _mm512_set_epi32(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16477}
16478
16479/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
16480///
16481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
16482#[inline]
16483#[target_feature(enable = "avx512f")]
16484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16485pub fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16486    _mm512_set_ps(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16487}
16488
16489/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
16490///
16491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
16492#[inline]
16493#[target_feature(enable = "avx512f")]
16494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16495pub fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16496    _mm512_set_pd(d, c, b, a, d, c, b, a)
16497}
16498
16499/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
16500///
16501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
16502#[inline]
16503#[target_feature(enable = "avx512f")]
16504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16505pub fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16506    _mm512_set_epi32(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16507}
16508
16509/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16510///
16511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
16512#[inline]
16513#[target_feature(enable = "avx512f")]
16514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16515pub fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16516    _mm512_set_ps(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16517}
16518
16519/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16520///
16521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
16522#[inline]
16523#[target_feature(enable = "avx512f")]
16524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16525pub fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16526    _mm512_set_pd(a, b, c, d, a, b, c, d)
16527}
16528
16529/// Set packed 64-bit integers in dst with the supplied values.
16530///
16531/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
16532#[inline]
16533#[target_feature(enable = "avx512f")]
16534#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16535pub fn _mm512_set_epi64(
16536    e0: i64,
16537    e1: i64,
16538    e2: i64,
16539    e3: i64,
16540    e4: i64,
16541    e5: i64,
16542    e6: i64,
16543    e7: i64,
16544) -> __m512i {
16545    _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
16546}
16547
16548/// Set packed 64-bit integers in dst with the supplied values in reverse order.
16549///
16550/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
16551#[inline]
16552#[target_feature(enable = "avx512f")]
16553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16554pub fn _mm512_setr_epi64(
16555    e0: i64,
16556    e1: i64,
16557    e2: i64,
16558    e3: i64,
16559    e4: i64,
16560    e5: i64,
16561    e6: i64,
16562    e7: i64,
16563) -> __m512i {
16564    unsafe {
16565        let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
16566        transmute(r)
16567    }
16568}
16569
16570/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16571///
16572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
16573#[inline]
16574#[target_feature(enable = "avx512f")]
16575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16576#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16577#[rustc_legacy_const_generics(2)]
16578pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(
16579    offsets: __m256i,
16580    slice: *const f64,
16581) -> __m512d {
16582    static_assert_imm8_scale!(SCALE);
16583    let zero = f64x8::ZERO;
16584    let neg_one = -1;
16585    let slice = slice as *const i8;
16586    let offsets = offsets.as_i32x8();
16587    let r = vgatherdpd(zero, slice, offsets, neg_one, SCALE);
16588    transmute(r)
16589}
16590
16591/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16592///
16593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
16594#[inline]
16595#[target_feature(enable = "avx512f")]
16596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16597#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16598#[rustc_legacy_const_generics(4)]
16599pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
16600    src: __m512d,
16601    mask: __mmask8,
16602    offsets: __m256i,
16603    slice: *const f64,
16604) -> __m512d {
16605    static_assert_imm8_scale!(SCALE);
16606    let src = src.as_f64x8();
16607    let slice = slice as *const i8;
16608    let offsets = offsets.as_i32x8();
16609    let r = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
16610    transmute(r)
16611}
16612
16613/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16614///
16615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
16616#[inline]
16617#[target_feature(enable = "avx512f")]
16618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16619#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16620#[rustc_legacy_const_generics(2)]
16621pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(
16622    offsets: __m512i,
16623    slice: *const f64,
16624) -> __m512d {
16625    static_assert_imm8_scale!(SCALE);
16626    let zero = f64x8::ZERO;
16627    let neg_one = -1;
16628    let slice = slice as *const i8;
16629    let offsets = offsets.as_i64x8();
16630    let r = vgatherqpd(zero, slice, offsets, neg_one, SCALE);
16631    transmute(r)
16632}
16633
16634/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16635///
16636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
16637#[inline]
16638#[target_feature(enable = "avx512f")]
16639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16640#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16641#[rustc_legacy_const_generics(4)]
16642pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
16643    src: __m512d,
16644    mask: __mmask8,
16645    offsets: __m512i,
16646    slice: *const f64,
16647) -> __m512d {
16648    static_assert_imm8_scale!(SCALE);
16649    let src = src.as_f64x8();
16650    let slice = slice as *const i8;
16651    let offsets = offsets.as_i64x8();
16652    let r = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
16653    transmute(r)
16654}
16655
16656/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16657///
16658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
16659#[inline]
16660#[target_feature(enable = "avx512f")]
16661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16662#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16663#[rustc_legacy_const_generics(2)]
16664pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m256 {
16665    static_assert_imm8_scale!(SCALE);
16666    let zero = f32x8::ZERO;
16667    let neg_one = -1;
16668    let slice = slice as *const i8;
16669    let offsets = offsets.as_i64x8();
16670    let r = vgatherqps(zero, slice, offsets, neg_one, SCALE);
16671    transmute(r)
16672}
16673
16674/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16675///
16676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
16677#[inline]
16678#[target_feature(enable = "avx512f")]
16679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16680#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16681#[rustc_legacy_const_generics(4)]
16682pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
16683    src: __m256,
16684    mask: __mmask8,
16685    offsets: __m512i,
16686    slice: *const f32,
16687) -> __m256 {
16688    static_assert_imm8_scale!(SCALE);
16689    let src = src.as_f32x8();
16690    let slice = slice as *const i8;
16691    let offsets = offsets.as_i64x8();
16692    let r = vgatherqps(src, slice, offsets, mask as i8, SCALE);
16693    transmute(r)
16694}
16695
16696/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16697///
16698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
16699#[inline]
16700#[target_feature(enable = "avx512f")]
16701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16702#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16703#[rustc_legacy_const_generics(2)]
16704pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m512 {
16705    static_assert_imm8_scale!(SCALE);
16706    let zero = f32x16::ZERO;
16707    let neg_one = -1;
16708    let slice = slice as *const i8;
16709    let offsets = offsets.as_i32x16();
16710    let r = vgatherdps(zero, slice, offsets, neg_one, SCALE);
16711    transmute(r)
16712}
16713
16714/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16715///
16716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
16717#[inline]
16718#[target_feature(enable = "avx512f")]
16719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16720#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16721#[rustc_legacy_const_generics(4)]
16722pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
16723    src: __m512,
16724    mask: __mmask16,
16725    offsets: __m512i,
16726    slice: *const f32,
16727) -> __m512 {
16728    static_assert_imm8_scale!(SCALE);
16729    let src = src.as_f32x16();
16730    let slice = slice as *const i8;
16731    let offsets = offsets.as_i32x16();
16732    let r = vgatherdps(src, slice, offsets, mask as i16, SCALE);
16733    transmute(r)
16734}
16735
16736/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16737///
16738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
16739#[inline]
16740#[target_feature(enable = "avx512f")]
16741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16742#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16743#[rustc_legacy_const_generics(2)]
16744pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
16745    offsets: __m512i,
16746    slice: *const i32,
16747) -> __m512i {
16748    static_assert_imm8_scale!(SCALE);
16749    let zero = i32x16::ZERO;
16750    let neg_one = -1;
16751    let slice = slice as *const i8;
16752    let offsets = offsets.as_i32x16();
16753    let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE);
16754    transmute(r)
16755}
16756
16757/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16758///
16759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
16760#[inline]
16761#[target_feature(enable = "avx512f")]
16762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16763#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16764#[rustc_legacy_const_generics(4)]
16765pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
16766    src: __m512i,
16767    mask: __mmask16,
16768    offsets: __m512i,
16769    slice: *const i32,
16770) -> __m512i {
16771    static_assert_imm8_scale!(SCALE);
16772    let src = src.as_i32x16();
16773    let mask = mask as i16;
16774    let slice = slice as *const i8;
16775    let offsets = offsets.as_i32x16();
16776    let r = vpgatherdd(src, slice, offsets, mask, SCALE);
16777    transmute(r)
16778}
16779
16780/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16781///
16782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
16783#[inline]
16784#[target_feature(enable = "avx512f")]
16785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16786#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16787#[rustc_legacy_const_generics(2)]
16788pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
16789    offsets: __m256i,
16790    slice: *const i64,
16791) -> __m512i {
16792    static_assert_imm8_scale!(SCALE);
16793    let zero = i64x8::ZERO;
16794    let neg_one = -1;
16795    let slice = slice as *const i8;
16796    let offsets = offsets.as_i32x8();
16797    let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE);
16798    transmute(r)
16799}
16800
16801/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16802///
16803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
16804#[inline]
16805#[target_feature(enable = "avx512f")]
16806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16807#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16808#[rustc_legacy_const_generics(4)]
16809pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
16810    src: __m512i,
16811    mask: __mmask8,
16812    offsets: __m256i,
16813    slice: *const i64,
16814) -> __m512i {
16815    static_assert_imm8_scale!(SCALE);
16816    let src = src.as_i64x8();
16817    let mask = mask as i8;
16818    let slice = slice as *const i8;
16819    let offsets = offsets.as_i32x8();
16820    let r = vpgatherdq(src, slice, offsets, mask, SCALE);
16821    transmute(r)
16822}
16823
16824/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16825///
16826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
16827#[inline]
16828#[target_feature(enable = "avx512f")]
16829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16830#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16831#[rustc_legacy_const_generics(2)]
16832pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
16833    offsets: __m512i,
16834    slice: *const i64,
16835) -> __m512i {
16836    static_assert_imm8_scale!(SCALE);
16837    let zero = i64x8::ZERO;
16838    let neg_one = -1;
16839    let slice = slice as *const i8;
16840    let offsets = offsets.as_i64x8();
16841    let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE);
16842    transmute(r)
16843}
16844
16845/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16846///
16847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
16848#[inline]
16849#[target_feature(enable = "avx512f")]
16850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16851#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16852#[rustc_legacy_const_generics(4)]
16853pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
16854    src: __m512i,
16855    mask: __mmask8,
16856    offsets: __m512i,
16857    slice: *const i64,
16858) -> __m512i {
16859    static_assert_imm8_scale!(SCALE);
16860    let src = src.as_i64x8();
16861    let mask = mask as i8;
16862    let slice = slice as *const i8;
16863    let offsets = offsets.as_i64x8();
16864    let r = vpgatherqq(src, slice, offsets, mask, SCALE);
16865    transmute(r)
16866}
16867
16868/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16869///
16870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
16871#[inline]
16872#[target_feature(enable = "avx512f")]
16873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16874#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16875#[rustc_legacy_const_generics(2)]
16876pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
16877    offsets: __m512i,
16878    slice: *const i32,
16879) -> __m256i {
16880    static_assert_imm8_scale!(SCALE);
16881    let zeros = i32x8::ZERO;
16882    let neg_one = -1;
16883    let slice = slice as *const i8;
16884    let offsets = offsets.as_i64x8();
16885    let r = vpgatherqd(zeros, slice, offsets, neg_one, SCALE);
16886    transmute(r)
16887}
16888
16889/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16890///
16891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
16892#[inline]
16893#[target_feature(enable = "avx512f")]
16894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16895#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16896#[rustc_legacy_const_generics(4)]
16897pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
16898    src: __m256i,
16899    mask: __mmask8,
16900    offsets: __m512i,
16901    slice: *const i32,
16902) -> __m256i {
16903    static_assert_imm8_scale!(SCALE);
16904    let src = src.as_i32x8();
16905    let mask = mask as i8;
16906    let slice = slice as *const i8;
16907    let offsets = offsets.as_i64x8();
16908    let r = vpgatherqd(src, slice, offsets, mask, SCALE);
16909    transmute(r)
16910}
16911
16912/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16913///
16914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
16915#[inline]
16916#[target_feature(enable = "avx512f")]
16917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16918#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16919#[rustc_legacy_const_generics(3)]
16920pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
16921    slice: *mut f64,
16922    offsets: __m256i,
16923    src: __m512d,
16924) {
16925    static_assert_imm8_scale!(SCALE);
16926    let src = src.as_f64x8();
16927    let neg_one = -1;
16928    let slice = slice as *mut i8;
16929    let offsets = offsets.as_i32x8();
16930    vscatterdpd(slice, neg_one, offsets, src, SCALE);
16931}
16932
16933/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16934///
16935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
16936#[inline]
16937#[target_feature(enable = "avx512f")]
16938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16939#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16940#[rustc_legacy_const_generics(4)]
16941pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
16942    slice: *mut f64,
16943    mask: __mmask8,
16944    offsets: __m256i,
16945    src: __m512d,
16946) {
16947    static_assert_imm8_scale!(SCALE);
16948    let src = src.as_f64x8();
16949    let slice = slice as *mut i8;
16950    let offsets = offsets.as_i32x8();
16951    vscatterdpd(slice, mask as i8, offsets, src, SCALE);
16952}
16953
16954/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16955///
16956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
16957#[inline]
16958#[target_feature(enable = "avx512f")]
16959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16960#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16961#[rustc_legacy_const_generics(3)]
16962pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
16963    slice: *mut f64,
16964    offsets: __m512i,
16965    src: __m512d,
16966) {
16967    static_assert_imm8_scale!(SCALE);
16968    let src = src.as_f64x8();
16969    let neg_one = -1;
16970    let slice = slice as *mut i8;
16971    let offsets = offsets.as_i64x8();
16972    vscatterqpd(slice, neg_one, offsets, src, SCALE);
16973}
16974
16975/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16976///
16977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
16978#[inline]
16979#[target_feature(enable = "avx512f")]
16980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16981#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16982#[rustc_legacy_const_generics(4)]
16983pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
16984    slice: *mut f64,
16985    mask: __mmask8,
16986    offsets: __m512i,
16987    src: __m512d,
16988) {
16989    static_assert_imm8_scale!(SCALE);
16990    let src = src.as_f64x8();
16991    let slice = slice as *mut i8;
16992    let offsets = offsets.as_i64x8();
16993    vscatterqpd(slice, mask as i8, offsets, src, SCALE);
16994}
16995
16996/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16997///
16998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
16999#[inline]
17000#[target_feature(enable = "avx512f")]
17001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17002#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17003#[rustc_legacy_const_generics(3)]
17004pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
17005    slice: *mut f32,
17006    offsets: __m512i,
17007    src: __m512,
17008) {
17009    static_assert_imm8_scale!(SCALE);
17010    let src = src.as_f32x16();
17011    let neg_one = -1;
17012    let slice = slice as *mut i8;
17013    let offsets = offsets.as_i32x16();
17014    vscatterdps(slice, neg_one, offsets, src, SCALE);
17015}
17016
17017/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17018///
17019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
17020#[inline]
17021#[target_feature(enable = "avx512f")]
17022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17023#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17024#[rustc_legacy_const_generics(4)]
17025pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
17026    slice: *mut f32,
17027    mask: __mmask16,
17028    offsets: __m512i,
17029    src: __m512,
17030) {
17031    static_assert_imm8_scale!(SCALE);
17032    let src = src.as_f32x16();
17033    let slice = slice as *mut i8;
17034    let offsets = offsets.as_i32x16();
17035    vscatterdps(slice, mask as i16, offsets, src, SCALE);
17036}
17037
17038/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17039///
17040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
17041#[inline]
17042#[target_feature(enable = "avx512f")]
17043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17044#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17045#[rustc_legacy_const_generics(3)]
17046pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
17047    slice: *mut f32,
17048    offsets: __m512i,
17049    src: __m256,
17050) {
17051    static_assert_imm8_scale!(SCALE);
17052    let src = src.as_f32x8();
17053    let neg_one = -1;
17054    let slice = slice as *mut i8;
17055    let offsets = offsets.as_i64x8();
17056    vscatterqps(slice, neg_one, offsets, src, SCALE);
17057}
17058
17059/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17060///
17061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
17062#[inline]
17063#[target_feature(enable = "avx512f")]
17064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17065#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17066#[rustc_legacy_const_generics(4)]
17067pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
17068    slice: *mut f32,
17069    mask: __mmask8,
17070    offsets: __m512i,
17071    src: __m256,
17072) {
17073    static_assert_imm8_scale!(SCALE);
17074    let src = src.as_f32x8();
17075    let slice = slice as *mut i8;
17076    let offsets = offsets.as_i64x8();
17077    vscatterqps(slice, mask as i8, offsets, src, SCALE);
17078}
17079
17080/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17081///
17082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
17083#[inline]
17084#[target_feature(enable = "avx512f")]
17085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17086#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17087#[rustc_legacy_const_generics(3)]
17088pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
17089    slice: *mut i64,
17090    offsets: __m256i,
17091    src: __m512i,
17092) {
17093    static_assert_imm8_scale!(SCALE);
17094    let src = src.as_i64x8();
17095    let neg_one = -1;
17096    let slice = slice as *mut i8;
17097    let offsets = offsets.as_i32x8();
17098    vpscatterdq(slice, neg_one, offsets, src, SCALE);
17099}
17100
17101/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17102///
17103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
17104#[inline]
17105#[target_feature(enable = "avx512f")]
17106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17107#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17108#[rustc_legacy_const_generics(4)]
17109pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
17110    slice: *mut i64,
17111    mask: __mmask8,
17112    offsets: __m256i,
17113    src: __m512i,
17114) {
17115    static_assert_imm8_scale!(SCALE);
17116    let src = src.as_i64x8();
17117    let mask = mask as i8;
17118    let slice = slice as *mut i8;
17119    let offsets = offsets.as_i32x8();
17120    vpscatterdq(slice, mask, offsets, src, SCALE);
17121}
17122
17123/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17124///
17125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
17126#[inline]
17127#[target_feature(enable = "avx512f")]
17128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17129#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17130#[rustc_legacy_const_generics(3)]
17131pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
17132    slice: *mut i64,
17133    offsets: __m512i,
17134    src: __m512i,
17135) {
17136    static_assert_imm8_scale!(SCALE);
17137    let src = src.as_i64x8();
17138    let neg_one = -1;
17139    let slice = slice as *mut i8;
17140    let offsets = offsets.as_i64x8();
17141    vpscatterqq(slice, neg_one, offsets, src, SCALE);
17142}
17143
17144/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17145///
17146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
17147#[inline]
17148#[target_feature(enable = "avx512f")]
17149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17150#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17151#[rustc_legacy_const_generics(4)]
17152pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
17153    slice: *mut i64,
17154    mask: __mmask8,
17155    offsets: __m512i,
17156    src: __m512i,
17157) {
17158    static_assert_imm8_scale!(SCALE);
17159    let src = src.as_i64x8();
17160    let mask = mask as i8;
17161    let slice = slice as *mut i8;
17162    let offsets = offsets.as_i64x8();
17163    vpscatterqq(slice, mask, offsets, src, SCALE);
17164}
17165
17166/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17167///
17168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
17169#[inline]
17170#[target_feature(enable = "avx512f")]
17171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17172#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17173#[rustc_legacy_const_generics(3)]
17174pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
17175    slice: *mut i32,
17176    offsets: __m512i,
17177    src: __m512i,
17178) {
17179    static_assert_imm8_scale!(SCALE);
17180    let src = src.as_i32x16();
17181    let neg_one = -1;
17182    let slice = slice as *mut i8;
17183    let offsets = offsets.as_i32x16();
17184    vpscatterdd(slice, neg_one, offsets, src, SCALE);
17185}
17186
17187/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17188///
17189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
17190#[inline]
17191#[target_feature(enable = "avx512f")]
17192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17193#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17194#[rustc_legacy_const_generics(4)]
17195pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
17196    slice: *mut i32,
17197    mask: __mmask16,
17198    offsets: __m512i,
17199    src: __m512i,
17200) {
17201    static_assert_imm8_scale!(SCALE);
17202    let src = src.as_i32x16();
17203    let mask = mask as i16;
17204    let slice = slice as *mut i8;
17205    let offsets = offsets.as_i32x16();
17206    vpscatterdd(slice, mask, offsets, src, SCALE);
17207}
17208
17209/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17210///
17211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
17212#[inline]
17213#[target_feature(enable = "avx512f")]
17214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17215#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17216#[rustc_legacy_const_generics(3)]
17217pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
17218    slice: *mut i32,
17219    offsets: __m512i,
17220    src: __m256i,
17221) {
17222    static_assert_imm8_scale!(SCALE);
17223    let src = src.as_i32x8();
17224    let neg_one = -1;
17225    let slice = slice as *mut i8;
17226    let offsets = offsets.as_i64x8();
17227    vpscatterqd(slice, neg_one, offsets, src, SCALE);
17228}
17229
17230/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17231///
17232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
17233#[inline]
17234#[target_feature(enable = "avx512f")]
17235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17236#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17237#[rustc_legacy_const_generics(4)]
17238pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
17239    slice: *mut i32,
17240    mask: __mmask8,
17241    offsets: __m512i,
17242    src: __m256i,
17243) {
17244    static_assert_imm8_scale!(SCALE);
17245    let src = src.as_i32x8();
17246    let mask = mask as i8;
17247    let slice = slice as *mut i8;
17248    let offsets = offsets.as_i64x8();
17249    vpscatterqd(slice, mask, offsets, src, SCALE);
17250}
17251
17252/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17253/// indices stored in the lower half of vindex scaled by scale and stores them in dst.
17254///
17255/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
17256#[inline]
17257#[target_feature(enable = "avx512f")]
17258#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17259#[rustc_legacy_const_generics(2)]
17260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17261pub unsafe fn _mm512_i32logather_epi64<const SCALE: i32>(
17262    vindex: __m512i,
17263    base_addr: *const i64,
17264) -> __m512i {
17265    _mm512_i32gather_epi64::<SCALE>(_mm512_castsi512_si256(vindex), base_addr)
17266}
17267
17268/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17269/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k
17270/// (elements are copied from src when the corresponding mask bit is not set).
17271///
17272/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
17273#[inline]
17274#[target_feature(enable = "avx512f")]
17275#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17276#[rustc_legacy_const_generics(4)]
17277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17278pub unsafe fn _mm512_mask_i32logather_epi64<const SCALE: i32>(
17279    src: __m512i,
17280    k: __mmask8,
17281    vindex: __m512i,
17282    base_addr: *const i64,
17283) -> __m512i {
17284    _mm512_mask_i32gather_epi64::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr)
17285}
17286
17287/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17288/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst.
17289///
17290/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
17291#[inline]
17292#[target_feature(enable = "avx512f")]
17293#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17294#[rustc_legacy_const_generics(2)]
17295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17296pub unsafe fn _mm512_i32logather_pd<const SCALE: i32>(
17297    vindex: __m512i,
17298    base_addr: *const f64,
17299) -> __m512d {
17300    _mm512_i32gather_pd::<SCALE>(_mm512_castsi512_si256(vindex), base_addr)
17301}
17302
17303/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17304/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst
17305/// using writemask k (elements are copied from src when the corresponding mask bit is not set).
17306///
17307/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
17308#[inline]
17309#[target_feature(enable = "avx512f")]
17310#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17311#[rustc_legacy_const_generics(4)]
17312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17313pub unsafe fn _mm512_mask_i32logather_pd<const SCALE: i32>(
17314    src: __m512d,
17315    k: __mmask8,
17316    vindex: __m512i,
17317    base_addr: *const f64,
17318) -> __m512d {
17319    _mm512_mask_i32gather_pd::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr)
17320}
17321
17322/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17323/// indices stored in the lower half of vindex scaled by scale.
17324///
17325/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
17326#[inline]
17327#[target_feature(enable = "avx512f")]
17328#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17329#[rustc_legacy_const_generics(3)]
17330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17331pub unsafe fn _mm512_i32loscatter_epi64<const SCALE: i32>(
17332    base_addr: *mut i64,
17333    vindex: __m512i,
17334    a: __m512i,
17335) {
17336    _mm512_i32scatter_epi64::<SCALE>(base_addr, _mm512_castsi512_si256(vindex), a)
17337}
17338
17339/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17340/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding
17341/// mask bit is not set are not written to memory).
17342///
17343/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
17344#[inline]
17345#[target_feature(enable = "avx512f")]
17346#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17347#[rustc_legacy_const_generics(4)]
17348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17349pub unsafe fn _mm512_mask_i32loscatter_epi64<const SCALE: i32>(
17350    base_addr: *mut i64,
17351    k: __mmask8,
17352    vindex: __m512i,
17353    a: __m512i,
17354) {
17355    _mm512_mask_i32scatter_epi64::<SCALE>(base_addr, k, _mm512_castsi512_si256(vindex), a)
17356}
17357
17358/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17359/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale.
17360///
17361/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
17362#[inline]
17363#[target_feature(enable = "avx512f")]
17364#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17365#[rustc_legacy_const_generics(3)]
17366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17367pub unsafe fn _mm512_i32loscatter_pd<const SCALE: i32>(
17368    base_addr: *mut f64,
17369    vindex: __m512i,
17370    a: __m512d,
17371) {
17372    _mm512_i32scatter_pd::<SCALE>(base_addr, _mm512_castsi512_si256(vindex), a)
17373}
17374
17375/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17376/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k
17377/// (elements whose corresponding mask bit is not set are not written to memory).
17378///
17379/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
17380#[inline]
17381#[target_feature(enable = "avx512f")]
17382#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17383#[rustc_legacy_const_generics(4)]
17384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17385pub unsafe fn _mm512_mask_i32loscatter_pd<const SCALE: i32>(
17386    base_addr: *mut f64,
17387    k: __mmask8,
17388    vindex: __m512i,
17389    a: __m512d,
17390) {
17391    _mm512_mask_i32scatter_pd::<SCALE>(base_addr, k, _mm512_castsi512_si256(vindex), a)
17392}
17393
17394/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17395/// indices stored in vindex scaled by scale
17396///
17397/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
17398#[inline]
17399#[target_feature(enable = "avx512f,avx512vl")]
17400#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17401#[rustc_legacy_const_generics(3)]
17402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17403pub unsafe fn _mm256_i32scatter_epi32<const SCALE: i32>(
17404    base_addr: *mut i32,
17405    vindex: __m256i,
17406    a: __m256i,
17407) {
17408    static_assert_imm8_scale!(SCALE);
17409    vpscatterdd_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17410}
17411
17412/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17413/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17414/// are not written to memory).
17415///
17416/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
17417#[inline]
17418#[target_feature(enable = "avx512f,avx512vl")]
17419#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17420#[rustc_legacy_const_generics(4)]
17421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17422pub unsafe fn _mm256_mask_i32scatter_epi32<const SCALE: i32>(
17423    base_addr: *mut i32,
17424    k: __mmask8,
17425    vindex: __m256i,
17426    a: __m256i,
17427) {
17428    static_assert_imm8_scale!(SCALE);
17429    vpscatterdd_256(base_addr as _, k, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17430}
17431
17432/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17433///
17434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
17435#[inline]
17436#[target_feature(enable = "avx512f,avx512vl")]
17437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17438#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17439#[rustc_legacy_const_generics(3)]
17440pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
17441    slice: *mut i64,
17442    offsets: __m128i,
17443    src: __m256i,
17444) {
17445    static_assert_imm8_scale!(SCALE);
17446    let src = src.as_i64x4();
17447    let slice = slice as *mut i8;
17448    let offsets = offsets.as_i32x4();
17449    vpscatterdq_256(slice, 0xff, offsets, src, SCALE);
17450}
17451
17452/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17453/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17454/// are not written to memory).
17455///
17456/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64)
17457#[inline]
17458#[target_feature(enable = "avx512f,avx512vl")]
17459#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17460#[rustc_legacy_const_generics(4)]
17461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17462pub unsafe fn _mm256_mask_i32scatter_epi64<const SCALE: i32>(
17463    base_addr: *mut i64,
17464    k: __mmask8,
17465    vindex: __m128i,
17466    a: __m256i,
17467) {
17468    static_assert_imm8_scale!(SCALE);
17469    vpscatterdq_256(base_addr as _, k, vindex.as_i32x4(), a.as_i64x4(), SCALE)
17470}
17471
17472/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17473/// at packed 32-bit integer indices stored in vindex scaled by scale
17474///
17475/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
17476#[inline]
17477#[target_feature(enable = "avx512f,avx512vl")]
17478#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17479#[rustc_legacy_const_generics(3)]
17480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17481pub unsafe fn _mm256_i32scatter_pd<const SCALE: i32>(
17482    base_addr: *mut f64,
17483    vindex: __m128i,
17484    a: __m256d,
17485) {
17486    static_assert_imm8_scale!(SCALE);
17487    vscatterdpd_256(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17488}
17489
17490/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17491/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17492/// mask bit is not set are not written to memory).
17493///
17494/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd)
17495#[inline]
17496#[target_feature(enable = "avx512f,avx512vl")]
17497#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17498#[rustc_legacy_const_generics(4)]
17499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17500pub unsafe fn _mm256_mask_i32scatter_pd<const SCALE: i32>(
17501    base_addr: *mut f64,
17502    k: __mmask8,
17503    vindex: __m128i,
17504    a: __m256d,
17505) {
17506    static_assert_imm8_scale!(SCALE);
17507    vscatterdpd_256(base_addr as _, k, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17508}
17509
17510/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17511/// at packed 32-bit integer indices stored in vindex scaled by scale
17512///
17513/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
17514#[inline]
17515#[target_feature(enable = "avx512f,avx512vl")]
17516#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17517#[rustc_legacy_const_generics(3)]
17518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17519pub unsafe fn _mm256_i32scatter_ps<const SCALE: i32>(
17520    base_addr: *mut f32,
17521    vindex: __m256i,
17522    a: __m256,
17523) {
17524    static_assert_imm8_scale!(SCALE);
17525    vscatterdps_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17526}
17527
17528/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17529/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17530/// mask bit is not set are not written to memory).
17531///
17532/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
17533#[inline]
17534#[target_feature(enable = "avx512f,avx512vl")]
17535#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17536#[rustc_legacy_const_generics(4)]
17537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17538pub unsafe fn _mm256_mask_i32scatter_ps<const SCALE: i32>(
17539    base_addr: *mut f32,
17540    k: __mmask8,
17541    vindex: __m256i,
17542    a: __m256,
17543) {
17544    static_assert_imm8_scale!(SCALE);
17545    vscatterdps_256(base_addr as _, k, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17546}
17547
17548/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17549/// indices stored in vindex scaled by scale
17550///
17551/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
17552#[inline]
17553#[target_feature(enable = "avx512f,avx512vl")]
17554#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17555#[rustc_legacy_const_generics(3)]
17556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17557pub unsafe fn _mm256_i64scatter_epi32<const SCALE: i32>(
17558    base_addr: *mut i32,
17559    vindex: __m256i,
17560    a: __m128i,
17561) {
17562    static_assert_imm8_scale!(SCALE);
17563    vpscatterqd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17564}
17565
17566/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17567/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17568/// are not written to memory).
17569///
17570/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
17571#[inline]
17572#[target_feature(enable = "avx512f,avx512vl")]
17573#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17574#[rustc_legacy_const_generics(4)]
17575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17576pub unsafe fn _mm256_mask_i64scatter_epi32<const SCALE: i32>(
17577    base_addr: *mut i32,
17578    k: __mmask8,
17579    vindex: __m256i,
17580    a: __m128i,
17581) {
17582    static_assert_imm8_scale!(SCALE);
17583    vpscatterqd_256(base_addr as _, k, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17584}
17585
17586/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17587/// indices stored in vindex scaled by scale
17588///
17589/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
17590#[inline]
17591#[target_feature(enable = "avx512f,avx512vl")]
17592#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17593#[rustc_legacy_const_generics(3)]
17594#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17595pub unsafe fn _mm256_i64scatter_epi64<const SCALE: i32>(
17596    base_addr: *mut i64,
17597    vindex: __m256i,
17598    a: __m256i,
17599) {
17600    static_assert_imm8_scale!(SCALE);
17601    vpscatterqq_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i64x4(), SCALE)
17602}
17603
17604/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17605/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17606/// are not written to memory).
17607///
17608/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
17609#[inline]
17610#[target_feature(enable = "avx512f,avx512vl")]
17611#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17612#[rustc_legacy_const_generics(4)]
17613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17614pub unsafe fn _mm256_mask_i64scatter_epi64<const SCALE: i32>(
17615    base_addr: *mut i64,
17616    k: __mmask8,
17617    vindex: __m256i,
17618    a: __m256i,
17619) {
17620    static_assert_imm8_scale!(SCALE);
17621    vpscatterqq_256(base_addr as _, k, vindex.as_i64x4(), a.as_i64x4(), SCALE)
17622}
17623
17624/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17625/// at packed 64-bit integer indices stored in vindex scaled by scale
17626///
17627/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
17628#[inline]
17629#[target_feature(enable = "avx512f,avx512vl")]
17630#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17631#[rustc_legacy_const_generics(3)]
17632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17633pub unsafe fn _mm256_i64scatter_pd<const SCALE: i32>(
17634    base_addr: *mut f64,
17635    vindex: __m256i,
17636    a: __m256d,
17637) {
17638    static_assert_imm8_scale!(SCALE);
17639    vscatterqpd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f64x4(), SCALE)
17640}
17641
17642/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17643/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17644/// mask bit is not set are not written to memory).
17645///
17646/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
17647#[inline]
17648#[target_feature(enable = "avx512f,avx512vl")]
17649#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17650#[rustc_legacy_const_generics(4)]
17651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17652pub unsafe fn _mm256_mask_i64scatter_pd<const SCALE: i32>(
17653    base_addr: *mut f64,
17654    k: __mmask8,
17655    vindex: __m256i,
17656    a: __m256d,
17657) {
17658    static_assert_imm8_scale!(SCALE);
17659    vscatterqpd_256(base_addr as _, k, vindex.as_i64x4(), a.as_f64x4(), SCALE)
17660}
17661
17662/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17663/// at packed 64-bit integer indices stored in vindex scaled by scale
17664///
17665/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
17666#[inline]
17667#[target_feature(enable = "avx512f,avx512vl")]
17668#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17669#[rustc_legacy_const_generics(3)]
17670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17671pub unsafe fn _mm256_i64scatter_ps<const SCALE: i32>(
17672    base_addr: *mut f32,
17673    vindex: __m256i,
17674    a: __m128,
17675) {
17676    static_assert_imm8_scale!(SCALE);
17677    vscatterqps_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f32x4(), SCALE)
17678}
17679
17680/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17681/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17682/// mask bit is not set are not written to memory).
17683///
17684/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
17685#[inline]
17686#[target_feature(enable = "avx512f,avx512vl")]
17687#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17688#[rustc_legacy_const_generics(4)]
17689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17690pub unsafe fn _mm256_mask_i64scatter_ps<const SCALE: i32>(
17691    base_addr: *mut f32,
17692    k: __mmask8,
17693    vindex: __m256i,
17694    a: __m128,
17695) {
17696    static_assert_imm8_scale!(SCALE);
17697    vscatterqps_256(base_addr as _, k, vindex.as_i64x4(), a.as_f32x4(), SCALE)
17698}
17699
17700/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17701/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17702/// mask bit is not set).
17703///
17704/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
17705#[inline]
17706#[target_feature(enable = "avx512f,avx512vl")]
17707#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
17708#[rustc_legacy_const_generics(4)]
17709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17710pub unsafe fn _mm256_mmask_i32gather_epi32<const SCALE: i32>(
17711    src: __m256i,
17712    k: __mmask8,
17713    vindex: __m256i,
17714    base_addr: *const i32,
17715) -> __m256i {
17716    static_assert_imm8_scale!(SCALE);
17717    transmute(vpgatherdd_256(
17718        src.as_i32x8(),
17719        base_addr as _,
17720        vindex.as_i32x8(),
17721        k,
17722        SCALE,
17723    ))
17724}
17725
17726/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17727/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17728/// mask bit is not set).
17729///
17730/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
17731#[inline]
17732#[target_feature(enable = "avx512f,avx512vl")]
17733#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17734#[rustc_legacy_const_generics(4)]
17735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17736pub unsafe fn _mm256_mmask_i32gather_epi64<const SCALE: i32>(
17737    src: __m256i,
17738    k: __mmask8,
17739    vindex: __m128i,
17740    base_addr: *const i64,
17741) -> __m256i {
17742    static_assert_imm8_scale!(SCALE);
17743    transmute(vpgatherdq_256(
17744        src.as_i64x4(),
17745        base_addr as _,
17746        vindex.as_i32x4(),
17747        k,
17748        SCALE,
17749    ))
17750}
17751
17752/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17753/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17754/// from src when the corresponding mask bit is not set).
17755///
17756/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
17757#[inline]
17758#[target_feature(enable = "avx512f,avx512vl")]
17759#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17760#[rustc_legacy_const_generics(4)]
17761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17762pub unsafe fn _mm256_mmask_i32gather_pd<const SCALE: i32>(
17763    src: __m256d,
17764    k: __mmask8,
17765    vindex: __m128i,
17766    base_addr: *const f64,
17767) -> __m256d {
17768    static_assert_imm8_scale!(SCALE);
17769    transmute(vgatherdpd_256(
17770        src.as_f64x4(),
17771        base_addr as _,
17772        vindex.as_i32x4(),
17773        k,
17774        SCALE,
17775    ))
17776}
17777
17778/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17779/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17780/// from src when the corresponding mask bit is not set).
17781///
17782/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
17783#[inline]
17784#[target_feature(enable = "avx512f,avx512vl")]
17785#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17786#[rustc_legacy_const_generics(4)]
17787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17788pub unsafe fn _mm256_mmask_i32gather_ps<const SCALE: i32>(
17789    src: __m256,
17790    k: __mmask8,
17791    vindex: __m256i,
17792    base_addr: *const f32,
17793) -> __m256 {
17794    static_assert_imm8_scale!(SCALE);
17795    transmute(vgatherdps_256(
17796        src.as_f32x8(),
17797        base_addr as _,
17798        vindex.as_i32x8(),
17799        k,
17800        SCALE,
17801    ))
17802}
17803
17804/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
17805/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17806/// mask bit is not set).
17807///
17808/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
17809#[inline]
17810#[target_feature(enable = "avx512f,avx512vl")]
17811#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17812#[rustc_legacy_const_generics(4)]
17813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17814pub unsafe fn _mm256_mmask_i64gather_epi32<const SCALE: i32>(
17815    src: __m128i,
17816    k: __mmask8,
17817    vindex: __m256i,
17818    base_addr: *const i32,
17819) -> __m128i {
17820    static_assert_imm8_scale!(SCALE);
17821    transmute(vpgatherqd_256(
17822        src.as_i32x4(),
17823        base_addr as _,
17824        vindex.as_i64x4(),
17825        k,
17826        SCALE,
17827    ))
17828}
17829
17830/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17831/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17832/// mask bit is not set).
17833///
17834/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
17835#[inline]
17836#[target_feature(enable = "avx512f,avx512vl")]
17837#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17838#[rustc_legacy_const_generics(4)]
17839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17840pub unsafe fn _mm256_mmask_i64gather_epi64<const SCALE: i32>(
17841    src: __m256i,
17842    k: __mmask8,
17843    vindex: __m256i,
17844    base_addr: *const i64,
17845) -> __m256i {
17846    static_assert_imm8_scale!(SCALE);
17847    transmute(vpgatherqq_256(
17848        src.as_i64x4(),
17849        base_addr as _,
17850        vindex.as_i64x4(),
17851        k,
17852        SCALE,
17853    ))
17854}
17855
17856/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17857/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17858/// from src when the corresponding mask bit is not set).
17859///
17860/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
17861#[inline]
17862#[target_feature(enable = "avx512f,avx512vl")]
17863#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17864#[rustc_legacy_const_generics(4)]
17865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17866pub unsafe fn _mm256_mmask_i64gather_pd<const SCALE: i32>(
17867    src: __m256d,
17868    k: __mmask8,
17869    vindex: __m256i,
17870    base_addr: *const f64,
17871) -> __m256d {
17872    static_assert_imm8_scale!(SCALE);
17873    transmute(vgatherqpd_256(
17874        src.as_f64x4(),
17875        base_addr as _,
17876        vindex.as_i64x4(),
17877        k,
17878        SCALE,
17879    ))
17880}
17881
17882/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17883/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17884/// from src when the corresponding mask bit is not set).
17885///
17886/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
17887#[inline]
17888#[target_feature(enable = "avx512f,avx512vl")]
17889#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17890#[rustc_legacy_const_generics(4)]
17891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17892pub unsafe fn _mm256_mmask_i64gather_ps<const SCALE: i32>(
17893    src: __m128,
17894    k: __mmask8,
17895    vindex: __m256i,
17896    base_addr: *const f32,
17897) -> __m128 {
17898    static_assert_imm8_scale!(SCALE);
17899    transmute(vgatherqps_256(
17900        src.as_f32x4(),
17901        base_addr as _,
17902        vindex.as_i64x4(),
17903        k,
17904        SCALE,
17905    ))
17906}
17907
17908/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17909/// indices stored in vindex scaled by scale
17910///
17911/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
17912#[inline]
17913#[target_feature(enable = "avx512f,avx512vl")]
17914#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17915#[rustc_legacy_const_generics(3)]
17916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17917pub unsafe fn _mm_i32scatter_epi32<const SCALE: i32>(
17918    base_addr: *mut i32,
17919    vindex: __m128i,
17920    a: __m128i,
17921) {
17922    static_assert_imm8_scale!(SCALE);
17923    vpscatterdd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i32x4(), SCALE)
17924}
17925
17926/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17927/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17928/// are not written to memory).
17929///
17930/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
17931#[inline]
17932#[target_feature(enable = "avx512f,avx512vl")]
17933#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17934#[rustc_legacy_const_generics(4)]
17935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17936pub unsafe fn _mm_mask_i32scatter_epi32<const SCALE: i32>(
17937    base_addr: *mut i32,
17938    k: __mmask8,
17939    vindex: __m128i,
17940    a: __m128i,
17941) {
17942    static_assert_imm8_scale!(SCALE);
17943    vpscatterdd_128(base_addr as _, k, vindex.as_i32x4(), a.as_i32x4(), SCALE)
17944}
17945
17946/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17947/// indices stored in vindex scaled by scale
17948///
17949/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
17950#[inline]
17951#[target_feature(enable = "avx512f,avx512vl")]
17952#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17953#[rustc_legacy_const_generics(3)]
17954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17955pub unsafe fn _mm_i32scatter_epi64<const SCALE: i32>(
17956    base_addr: *mut i64,
17957    vindex: __m128i,
17958    a: __m128i,
17959) {
17960    static_assert_imm8_scale!(SCALE);
17961    vpscatterdq_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i64x2(), SCALE)
17962}
17963
17964/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17965/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17966/// are not written to memory).
17967///
17968/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
17969#[inline]
17970#[target_feature(enable = "avx512f,avx512vl")]
17971#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17972#[rustc_legacy_const_generics(4)]
17973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17974pub unsafe fn _mm_mask_i32scatter_epi64<const SCALE: i32>(
17975    base_addr: *mut i64,
17976    k: __mmask8,
17977    vindex: __m128i,
17978    a: __m128i,
17979) {
17980    static_assert_imm8_scale!(SCALE);
17981    vpscatterdq_128(base_addr as _, k, vindex.as_i32x4(), a.as_i64x2(), SCALE)
17982}
17983
17984/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17985/// at packed 32-bit integer indices stored in vindex scaled by scale
17986///
17987/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
17988#[inline]
17989#[target_feature(enable = "avx512f,avx512vl")]
17990#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17991#[rustc_legacy_const_generics(3)]
17992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17993pub unsafe fn _mm_i32scatter_pd<const SCALE: i32>(
17994    base_addr: *mut f64,
17995    vindex: __m128i,
17996    a: __m128d,
17997) {
17998    static_assert_imm8_scale!(SCALE);
17999    vscatterdpd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x2(), SCALE)
18000}
18001
18002/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18003/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18004/// mask bit is not set are not written to memory).
18005///
18006/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
18007#[inline]
18008#[target_feature(enable = "avx512f,avx512vl")]
18009#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
18010#[rustc_legacy_const_generics(4)]
18011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18012pub unsafe fn _mm_mask_i32scatter_pd<const SCALE: i32>(
18013    base_addr: *mut f64,
18014    k: __mmask8,
18015    vindex: __m128i,
18016    a: __m128d,
18017) {
18018    static_assert_imm8_scale!(SCALE);
18019    vscatterdpd_128(base_addr as _, k, vindex.as_i32x4(), a.as_f64x2(), SCALE)
18020}
18021
18022/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18023/// at packed 32-bit integer indices stored in vindex scaled by scale
18024///
18025/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
18026#[inline]
18027#[target_feature(enable = "avx512f,avx512vl")]
18028#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
18029#[rustc_legacy_const_generics(3)]
18030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18031pub unsafe fn _mm_i32scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18032    static_assert_imm8_scale!(SCALE);
18033    vscatterdps_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f32x4(), SCALE)
18034}
18035
18036/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18037/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18038/// mask bit is not set are not written to memory).
18039///
18040/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
18041#[inline]
18042#[target_feature(enable = "avx512f,avx512vl")]
18043#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
18044#[rustc_legacy_const_generics(4)]
18045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18046pub unsafe fn _mm_mask_i32scatter_ps<const SCALE: i32>(
18047    base_addr: *mut f32,
18048    k: __mmask8,
18049    vindex: __m128i,
18050    a: __m128,
18051) {
18052    static_assert_imm8_scale!(SCALE);
18053    vscatterdps_128(base_addr as _, k, vindex.as_i32x4(), a.as_f32x4(), SCALE)
18054}
18055
18056/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18057/// indices stored in vindex scaled by scale
18058///
18059/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
18060#[inline]
18061#[target_feature(enable = "avx512f,avx512vl")]
18062#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18063#[rustc_legacy_const_generics(3)]
18064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18065pub unsafe fn _mm_i64scatter_epi32<const SCALE: i32>(
18066    base_addr: *mut i32,
18067    vindex: __m128i,
18068    a: __m128i,
18069) {
18070    static_assert_imm8_scale!(SCALE);
18071    vpscatterqd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18072}
18073
18074/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18075/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18076/// are not written to memory).
18077///
18078/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
18079#[inline]
18080#[target_feature(enable = "avx512f,avx512vl")]
18081#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18082#[rustc_legacy_const_generics(4)]
18083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18084pub unsafe fn _mm_mask_i64scatter_epi32<const SCALE: i32>(
18085    base_addr: *mut i32,
18086    k: __mmask8,
18087    vindex: __m128i,
18088    a: __m128i,
18089) {
18090    static_assert_imm8_scale!(SCALE);
18091    vpscatterqd_128(base_addr as _, k, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18092}
18093
18094/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18095/// indices stored in vindex scaled by scale
18096///
18097/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
18098#[inline]
18099#[target_feature(enable = "avx512f,avx512vl")]
18100#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18101#[rustc_legacy_const_generics(3)]
18102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18103pub unsafe fn _mm_i64scatter_epi64<const SCALE: i32>(
18104    base_addr: *mut i64,
18105    vindex: __m128i,
18106    a: __m128i,
18107) {
18108    static_assert_imm8_scale!(SCALE);
18109    vpscatterqq_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18110}
18111
18112/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18113/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18114/// are not written to memory).
18115///
18116/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
18117#[inline]
18118#[target_feature(enable = "avx512f,avx512vl")]
18119#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18120#[rustc_legacy_const_generics(4)]
18121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18122pub unsafe fn _mm_mask_i64scatter_epi64<const SCALE: i32>(
18123    base_addr: *mut i64,
18124    k: __mmask8,
18125    vindex: __m128i,
18126    a: __m128i,
18127) {
18128    static_assert_imm8_scale!(SCALE);
18129    vpscatterqq_128(base_addr as _, k, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18130}
18131
18132/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18133/// at packed 64-bit integer indices stored in vindex scaled by scale
18134///
18135/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
18136#[inline]
18137#[target_feature(enable = "avx512f,avx512vl")]
18138#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18139#[rustc_legacy_const_generics(3)]
18140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18141pub unsafe fn _mm_i64scatter_pd<const SCALE: i32>(
18142    base_addr: *mut f64,
18143    vindex: __m128i,
18144    a: __m128d,
18145) {
18146    static_assert_imm8_scale!(SCALE);
18147    vscatterqpd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18148}
18149
18150/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18151/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18152/// mask bit is not set are not written to memory).
18153///
18154/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
18155#[inline]
18156#[target_feature(enable = "avx512f,avx512vl")]
18157#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18158#[rustc_legacy_const_generics(4)]
18159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18160pub unsafe fn _mm_mask_i64scatter_pd<const SCALE: i32>(
18161    base_addr: *mut f64,
18162    k: __mmask8,
18163    vindex: __m128i,
18164    a: __m128d,
18165) {
18166    static_assert_imm8_scale!(SCALE);
18167    vscatterqpd_128(base_addr as _, k, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18168}
18169
18170/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18171/// at packed 64-bit integer indices stored in vindex scaled by scale
18172///
18173/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
18174#[inline]
18175#[target_feature(enable = "avx512f,avx512vl")]
18176#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18177#[rustc_legacy_const_generics(3)]
18178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18179pub unsafe fn _mm_i64scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18180    static_assert_imm8_scale!(SCALE);
18181    vscatterqps_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18182}
18183
18184/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18185/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18186///
18187/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
18188#[inline]
18189#[target_feature(enable = "avx512f,avx512vl")]
18190#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18191#[rustc_legacy_const_generics(4)]
18192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18193pub unsafe fn _mm_mask_i64scatter_ps<const SCALE: i32>(
18194    base_addr: *mut f32,
18195    k: __mmask8,
18196    vindex: __m128i,
18197    a: __m128,
18198) {
18199    static_assert_imm8_scale!(SCALE);
18200    vscatterqps_128(base_addr as _, k, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18201}
18202
18203/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18204/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18205/// mask bit is not set).
18206///
18207/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
18208#[inline]
18209#[target_feature(enable = "avx512f,avx512vl")]
18210#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
18211#[rustc_legacy_const_generics(4)]
18212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18213pub unsafe fn _mm_mmask_i32gather_epi32<const SCALE: i32>(
18214    src: __m128i,
18215    k: __mmask8,
18216    vindex: __m128i,
18217    base_addr: *const i32,
18218) -> __m128i {
18219    static_assert_imm8_scale!(SCALE);
18220    transmute(vpgatherdd_128(
18221        src.as_i32x4(),
18222        base_addr as _,
18223        vindex.as_i32x4(),
18224        k,
18225        SCALE,
18226    ))
18227}
18228
18229/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18230/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18231/// mask bit is not set).
18232///
18233/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
18234#[inline]
18235#[target_feature(enable = "avx512f,avx512vl")]
18236#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
18237#[rustc_legacy_const_generics(4)]
18238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18239pub unsafe fn _mm_mmask_i32gather_epi64<const SCALE: i32>(
18240    src: __m128i,
18241    k: __mmask8,
18242    vindex: __m128i,
18243    base_addr: *const i64,
18244) -> __m128i {
18245    static_assert_imm8_scale!(SCALE);
18246    transmute(vpgatherdq_128(
18247        src.as_i64x2(),
18248        base_addr as _,
18249        vindex.as_i32x4(),
18250        k,
18251        SCALE,
18252    ))
18253}
18254
18255/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18256/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18257/// from src when the corresponding mask bit is not set).
18258///
18259/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
18260#[inline]
18261#[target_feature(enable = "avx512f,avx512vl")]
18262#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
18263#[rustc_legacy_const_generics(4)]
18264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18265pub unsafe fn _mm_mmask_i32gather_pd<const SCALE: i32>(
18266    src: __m128d,
18267    k: __mmask8,
18268    vindex: __m128i,
18269    base_addr: *const f64,
18270) -> __m128d {
18271    static_assert_imm8_scale!(SCALE);
18272    transmute(vgatherdpd_128(
18273        src.as_f64x2(),
18274        base_addr as _,
18275        vindex.as_i32x4(),
18276        k,
18277        SCALE,
18278    ))
18279}
18280
18281/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18282/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18283/// from src when the corresponding mask bit is not set).
18284///
18285/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
18286#[inline]
18287#[target_feature(enable = "avx512f,avx512vl")]
18288#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
18289#[rustc_legacy_const_generics(4)]
18290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18291pub unsafe fn _mm_mmask_i32gather_ps<const SCALE: i32>(
18292    src: __m128,
18293    k: __mmask8,
18294    vindex: __m128i,
18295    base_addr: *const f32,
18296) -> __m128 {
18297    static_assert_imm8_scale!(SCALE);
18298    transmute(vgatherdps_128(
18299        src.as_f32x4(),
18300        base_addr as _,
18301        vindex.as_i32x4(),
18302        k,
18303        SCALE,
18304    ))
18305}
18306
18307/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18308/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18309/// mask bit is not set).
18310///
18311/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
18312#[inline]
18313#[target_feature(enable = "avx512f,avx512vl")]
18314#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
18315#[rustc_legacy_const_generics(4)]
18316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18317pub unsafe fn _mm_mmask_i64gather_epi32<const SCALE: i32>(
18318    src: __m128i,
18319    k: __mmask8,
18320    vindex: __m128i,
18321    base_addr: *const i32,
18322) -> __m128i {
18323    static_assert_imm8_scale!(SCALE);
18324    transmute(vpgatherqd_128(
18325        src.as_i32x4(),
18326        base_addr as _,
18327        vindex.as_i64x2(),
18328        k,
18329        SCALE,
18330    ))
18331}
18332
18333/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18334/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18335/// mask bit is not set).
18336///
18337/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
18338#[inline]
18339#[target_feature(enable = "avx512f,avx512vl")]
18340#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
18341#[rustc_legacy_const_generics(4)]
18342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18343pub unsafe fn _mm_mmask_i64gather_epi64<const SCALE: i32>(
18344    src: __m128i,
18345    k: __mmask8,
18346    vindex: __m128i,
18347    base_addr: *const i64,
18348) -> __m128i {
18349    static_assert_imm8_scale!(SCALE);
18350    transmute(vpgatherqq_128(
18351        src.as_i64x2(),
18352        base_addr as _,
18353        vindex.as_i64x2(),
18354        k,
18355        SCALE,
18356    ))
18357}
18358
18359/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18360/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18361/// from src when the corresponding mask bit is not set).
18362///
18363/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
18364#[inline]
18365#[target_feature(enable = "avx512f,avx512vl")]
18366#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
18367#[rustc_legacy_const_generics(4)]
18368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18369pub unsafe fn _mm_mmask_i64gather_pd<const SCALE: i32>(
18370    src: __m128d,
18371    k: __mmask8,
18372    vindex: __m128i,
18373    base_addr: *const f64,
18374) -> __m128d {
18375    static_assert_imm8_scale!(SCALE);
18376    transmute(vgatherqpd_128(
18377        src.as_f64x2(),
18378        base_addr as _,
18379        vindex.as_i64x2(),
18380        k,
18381        SCALE,
18382    ))
18383}
18384
18385/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18386/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18387/// from src when the corresponding mask bit is not set).
18388///
18389/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
18390#[inline]
18391#[target_feature(enable = "avx512f,avx512vl")]
18392#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
18393#[rustc_legacy_const_generics(4)]
18394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18395pub unsafe fn _mm_mmask_i64gather_ps<const SCALE: i32>(
18396    src: __m128,
18397    k: __mmask8,
18398    vindex: __m128i,
18399    base_addr: *const f32,
18400) -> __m128 {
18401    static_assert_imm8_scale!(SCALE);
18402    transmute(vgatherqps_128(
18403        src.as_f32x4(),
18404        base_addr as _,
18405        vindex.as_i64x2(),
18406        k,
18407        SCALE,
18408    ))
18409}
18410
18411/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18412///
18413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
18414#[inline]
18415#[target_feature(enable = "avx512f")]
18416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18417#[cfg_attr(test, assert_instr(vpcompressd))]
18418pub fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18419    unsafe { transmute(vpcompressd(a.as_i32x16(), src.as_i32x16(), k)) }
18420}
18421
18422/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18423///
18424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
18425#[inline]
18426#[target_feature(enable = "avx512f")]
18427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18428#[cfg_attr(test, assert_instr(vpcompressd))]
18429pub fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
18430    unsafe { transmute(vpcompressd(a.as_i32x16(), i32x16::ZERO, k)) }
18431}
18432
18433/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18434///
18435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
18436#[inline]
18437#[target_feature(enable = "avx512f,avx512vl")]
18438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18439#[cfg_attr(test, assert_instr(vpcompressd))]
18440pub fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18441    unsafe { transmute(vpcompressd256(a.as_i32x8(), src.as_i32x8(), k)) }
18442}
18443
18444/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18445///
18446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
18447#[inline]
18448#[target_feature(enable = "avx512f,avx512vl")]
18449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18450#[cfg_attr(test, assert_instr(vpcompressd))]
18451pub fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
18452    unsafe { transmute(vpcompressd256(a.as_i32x8(), i32x8::ZERO, k)) }
18453}
18454
18455/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18456///
18457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
18458#[inline]
18459#[target_feature(enable = "avx512f,avx512vl")]
18460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18461#[cfg_attr(test, assert_instr(vpcompressd))]
18462pub fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18463    unsafe { transmute(vpcompressd128(a.as_i32x4(), src.as_i32x4(), k)) }
18464}
18465
18466/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18467///
18468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
18469#[inline]
18470#[target_feature(enable = "avx512f,avx512vl")]
18471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18472#[cfg_attr(test, assert_instr(vpcompressd))]
18473pub fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
18474    unsafe { transmute(vpcompressd128(a.as_i32x4(), i32x4::ZERO, k)) }
18475}
18476
18477/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18478///
18479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
18480#[inline]
18481#[target_feature(enable = "avx512f")]
18482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18483#[cfg_attr(test, assert_instr(vpcompressq))]
18484pub fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18485    unsafe { transmute(vpcompressq(a.as_i64x8(), src.as_i64x8(), k)) }
18486}
18487
18488/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18489///
18490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
18491#[inline]
18492#[target_feature(enable = "avx512f")]
18493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18494#[cfg_attr(test, assert_instr(vpcompressq))]
18495pub fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
18496    unsafe { transmute(vpcompressq(a.as_i64x8(), i64x8::ZERO, k)) }
18497}
18498
18499/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18500///
18501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
18502#[inline]
18503#[target_feature(enable = "avx512f,avx512vl")]
18504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18505#[cfg_attr(test, assert_instr(vpcompressq))]
18506pub fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18507    unsafe { transmute(vpcompressq256(a.as_i64x4(), src.as_i64x4(), k)) }
18508}
18509
18510/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18511///
18512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
18513#[inline]
18514#[target_feature(enable = "avx512f,avx512vl")]
18515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18516#[cfg_attr(test, assert_instr(vpcompressq))]
18517pub fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
18518    unsafe { transmute(vpcompressq256(a.as_i64x4(), i64x4::ZERO, k)) }
18519}
18520
18521/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18522///
18523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
18524#[inline]
18525#[target_feature(enable = "avx512f,avx512vl")]
18526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18527#[cfg_attr(test, assert_instr(vpcompressq))]
18528pub fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18529    unsafe { transmute(vpcompressq128(a.as_i64x2(), src.as_i64x2(), k)) }
18530}
18531
18532/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18533///
18534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
18535#[inline]
18536#[target_feature(enable = "avx512f,avx512vl")]
18537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18538#[cfg_attr(test, assert_instr(vpcompressq))]
18539pub fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
18540    unsafe { transmute(vpcompressq128(a.as_i64x2(), i64x2::ZERO, k)) }
18541}
18542
18543/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18544///
18545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
18546#[inline]
18547#[target_feature(enable = "avx512f")]
18548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18549#[cfg_attr(test, assert_instr(vcompressps))]
18550pub fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18551    unsafe { transmute(vcompressps(a.as_f32x16(), src.as_f32x16(), k)) }
18552}
18553
18554/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18555///
18556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
18557#[inline]
18558#[target_feature(enable = "avx512f")]
18559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18560#[cfg_attr(test, assert_instr(vcompressps))]
18561pub fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
18562    unsafe { transmute(vcompressps(a.as_f32x16(), f32x16::ZERO, k)) }
18563}
18564
18565/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18566///
18567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
18568#[inline]
18569#[target_feature(enable = "avx512f,avx512vl")]
18570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18571#[cfg_attr(test, assert_instr(vcompressps))]
18572pub fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18573    unsafe { transmute(vcompressps256(a.as_f32x8(), src.as_f32x8(), k)) }
18574}
18575
18576/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18577///
18578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
18579#[inline]
18580#[target_feature(enable = "avx512f,avx512vl")]
18581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18582#[cfg_attr(test, assert_instr(vcompressps))]
18583pub fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
18584    unsafe { transmute(vcompressps256(a.as_f32x8(), f32x8::ZERO, k)) }
18585}
18586
18587/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18588///
18589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
18590#[inline]
18591#[target_feature(enable = "avx512f,avx512vl")]
18592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18593#[cfg_attr(test, assert_instr(vcompressps))]
18594pub fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18595    unsafe { transmute(vcompressps128(a.as_f32x4(), src.as_f32x4(), k)) }
18596}
18597
18598/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18599///
18600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
18601#[inline]
18602#[target_feature(enable = "avx512f,avx512vl")]
18603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18604#[cfg_attr(test, assert_instr(vcompressps))]
18605pub fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
18606    unsafe { transmute(vcompressps128(a.as_f32x4(), f32x4::ZERO, k)) }
18607}
18608
18609/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18610///
18611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
18612#[inline]
18613#[target_feature(enable = "avx512f")]
18614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18615#[cfg_attr(test, assert_instr(vcompresspd))]
18616pub fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
18617    unsafe { transmute(vcompresspd(a.as_f64x8(), src.as_f64x8(), k)) }
18618}
18619
18620/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18621///
18622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
18623#[inline]
18624#[target_feature(enable = "avx512f")]
18625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18626#[cfg_attr(test, assert_instr(vcompresspd))]
18627pub fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
18628    unsafe { transmute(vcompresspd(a.as_f64x8(), f64x8::ZERO, k)) }
18629}
18630
18631/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18632///
18633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
18634#[inline]
18635#[target_feature(enable = "avx512f,avx512vl")]
18636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18637#[cfg_attr(test, assert_instr(vcompresspd))]
18638pub fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
18639    unsafe { transmute(vcompresspd256(a.as_f64x4(), src.as_f64x4(), k)) }
18640}
18641
18642/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18643///
18644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
18645#[inline]
18646#[target_feature(enable = "avx512f,avx512vl")]
18647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18648#[cfg_attr(test, assert_instr(vcompresspd))]
18649pub fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
18650    unsafe { transmute(vcompresspd256(a.as_f64x4(), f64x4::ZERO, k)) }
18651}
18652
18653/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18654///
18655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
18656#[inline]
18657#[target_feature(enable = "avx512f,avx512vl")]
18658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18659#[cfg_attr(test, assert_instr(vcompresspd))]
18660pub fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
18661    unsafe { transmute(vcompresspd128(a.as_f64x2(), src.as_f64x2(), k)) }
18662}
18663
18664/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18665///
18666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
18667#[inline]
18668#[target_feature(enable = "avx512f,avx512vl")]
18669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18670#[cfg_attr(test, assert_instr(vcompresspd))]
18671pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
18672    unsafe { transmute(vcompresspd128(a.as_f64x2(), f64x2::ZERO, k)) }
18673}
18674
18675/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18676///
18677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
18678#[inline]
18679#[target_feature(enable = "avx512f")]
18680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18681#[cfg_attr(test, assert_instr(vpcompressd))]
18682pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask16, a: __m512i) {
18683    vcompressstored(base_addr as *mut _, a.as_i32x16(), k)
18684}
18685
18686/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18687///
18688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
18689#[inline]
18690#[target_feature(enable = "avx512f,avx512vl")]
18691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18692#[cfg_attr(test, assert_instr(vpcompressd))]
18693pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m256i) {
18694    vcompressstored256(base_addr as *mut _, a.as_i32x8(), k)
18695}
18696
18697/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18698///
18699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
18700#[inline]
18701#[target_feature(enable = "avx512f,avx512vl")]
18702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18703#[cfg_attr(test, assert_instr(vpcompressd))]
18704pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m128i) {
18705    vcompressstored128(base_addr as *mut _, a.as_i32x4(), k)
18706}
18707
18708/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18709///
18710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
18711#[inline]
18712#[target_feature(enable = "avx512f")]
18713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18714#[cfg_attr(test, assert_instr(vpcompressq))]
18715pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m512i) {
18716    vcompressstoreq(base_addr as *mut _, a.as_i64x8(), k)
18717}
18718
18719/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18720///
18721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
18722#[inline]
18723#[target_feature(enable = "avx512f,avx512vl")]
18724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18725#[cfg_attr(test, assert_instr(vpcompressq))]
18726pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m256i) {
18727    vcompressstoreq256(base_addr as *mut _, a.as_i64x4(), k)
18728}
18729
18730/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18731///
18732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
18733#[inline]
18734#[target_feature(enable = "avx512f,avx512vl")]
18735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18736#[cfg_attr(test, assert_instr(vpcompressq))]
18737pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m128i) {
18738    vcompressstoreq128(base_addr as *mut _, a.as_i64x2(), k)
18739}
18740
18741/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18742///
18743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
18744#[inline]
18745#[target_feature(enable = "avx512f")]
18746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18747#[cfg_attr(test, assert_instr(vcompressps))]
18748pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask16, a: __m512) {
18749    vcompressstoreps(base_addr as *mut _, a.as_f32x16(), k)
18750}
18751
18752/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18753///
18754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
18755#[inline]
18756#[target_feature(enable = "avx512f,avx512vl")]
18757#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18758#[cfg_attr(test, assert_instr(vcompressps))]
18759pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m256) {
18760    vcompressstoreps256(base_addr as *mut _, a.as_f32x8(), k)
18761}
18762
18763/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18764///
18765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
18766#[inline]
18767#[target_feature(enable = "avx512f,avx512vl")]
18768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18769#[cfg_attr(test, assert_instr(vcompressps))]
18770pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m128) {
18771    vcompressstoreps128(base_addr as *mut _, a.as_f32x4(), k)
18772}
18773
18774/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18775///
18776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
18777#[inline]
18778#[target_feature(enable = "avx512f")]
18779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18780#[cfg_attr(test, assert_instr(vcompresspd))]
18781pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m512d) {
18782    vcompressstorepd(base_addr as *mut _, a.as_f64x8(), k)
18783}
18784
18785/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18786///
18787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
18788#[inline]
18789#[target_feature(enable = "avx512f,avx512vl")]
18790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18791#[cfg_attr(test, assert_instr(vcompresspd))]
18792pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m256d) {
18793    vcompressstorepd256(base_addr as *mut _, a.as_f64x4(), k)
18794}
18795
18796/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18797///
18798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
18799#[inline]
18800#[target_feature(enable = "avx512f,avx512vl")]
18801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18802#[cfg_attr(test, assert_instr(vcompresspd))]
18803pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m128d) {
18804    vcompressstorepd128(base_addr as *mut _, a.as_f64x2(), k)
18805}
18806
18807/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18808///
18809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
18810#[inline]
18811#[target_feature(enable = "avx512f")]
18812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18813#[cfg_attr(test, assert_instr(vpexpandd))]
18814pub fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18815    unsafe { transmute(vpexpandd(a.as_i32x16(), src.as_i32x16(), k)) }
18816}
18817
18818/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18819///
18820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
18821#[inline]
18822#[target_feature(enable = "avx512f")]
18823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18824#[cfg_attr(test, assert_instr(vpexpandd))]
18825pub fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
18826    unsafe { transmute(vpexpandd(a.as_i32x16(), i32x16::ZERO, k)) }
18827}
18828
18829/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18830///
18831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
18832#[inline]
18833#[target_feature(enable = "avx512f,avx512vl")]
18834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18835#[cfg_attr(test, assert_instr(vpexpandd))]
18836pub fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18837    unsafe { transmute(vpexpandd256(a.as_i32x8(), src.as_i32x8(), k)) }
18838}
18839
18840/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18841///
18842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
18843#[inline]
18844#[target_feature(enable = "avx512f,avx512vl")]
18845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18846#[cfg_attr(test, assert_instr(vpexpandd))]
18847pub fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
18848    unsafe { transmute(vpexpandd256(a.as_i32x8(), i32x8::ZERO, k)) }
18849}
18850
18851/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18852///
18853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
18854#[inline]
18855#[target_feature(enable = "avx512f,avx512vl")]
18856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18857#[cfg_attr(test, assert_instr(vpexpandd))]
18858pub fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18859    unsafe { transmute(vpexpandd128(a.as_i32x4(), src.as_i32x4(), k)) }
18860}
18861
18862/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18863///
18864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
18865#[inline]
18866#[target_feature(enable = "avx512f,avx512vl")]
18867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18868#[cfg_attr(test, assert_instr(vpexpandd))]
18869pub fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
18870    unsafe { transmute(vpexpandd128(a.as_i32x4(), i32x4::ZERO, k)) }
18871}
18872
18873/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18874///
18875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
18876#[inline]
18877#[target_feature(enable = "avx512f")]
18878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18879#[cfg_attr(test, assert_instr(vpexpandq))]
18880pub fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18881    unsafe { transmute(vpexpandq(a.as_i64x8(), src.as_i64x8(), k)) }
18882}
18883
18884/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18885///
18886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
18887#[inline]
18888#[target_feature(enable = "avx512f")]
18889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18890#[cfg_attr(test, assert_instr(vpexpandq))]
18891pub fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
18892    unsafe { transmute(vpexpandq(a.as_i64x8(), i64x8::ZERO, k)) }
18893}
18894
18895/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18896///
18897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
18898#[inline]
18899#[target_feature(enable = "avx512f,avx512vl")]
18900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18901#[cfg_attr(test, assert_instr(vpexpandq))]
18902pub fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18903    unsafe { transmute(vpexpandq256(a.as_i64x4(), src.as_i64x4(), k)) }
18904}
18905
18906/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18907///
18908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
18909#[inline]
18910#[target_feature(enable = "avx512f,avx512vl")]
18911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18912#[cfg_attr(test, assert_instr(vpexpandq))]
18913pub fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
18914    unsafe { transmute(vpexpandq256(a.as_i64x4(), i64x4::ZERO, k)) }
18915}
18916
18917/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18918///
18919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
18920#[inline]
18921#[target_feature(enable = "avx512f,avx512vl")]
18922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18923#[cfg_attr(test, assert_instr(vpexpandq))]
18924pub fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18925    unsafe { transmute(vpexpandq128(a.as_i64x2(), src.as_i64x2(), k)) }
18926}
18927
18928/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18929///
18930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
18931#[inline]
18932#[target_feature(enable = "avx512f,avx512vl")]
18933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18934#[cfg_attr(test, assert_instr(vpexpandq))]
18935pub fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
18936    unsafe { transmute(vpexpandq128(a.as_i64x2(), i64x2::ZERO, k)) }
18937}
18938
18939/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18940///
18941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
18942#[inline]
18943#[target_feature(enable = "avx512f")]
18944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18945#[cfg_attr(test, assert_instr(vexpandps))]
18946pub fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18947    unsafe { transmute(vexpandps(a.as_f32x16(), src.as_f32x16(), k)) }
18948}
18949
18950/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18951///
18952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
18953#[inline]
18954#[target_feature(enable = "avx512f")]
18955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18956#[cfg_attr(test, assert_instr(vexpandps))]
18957pub fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
18958    unsafe { transmute(vexpandps(a.as_f32x16(), f32x16::ZERO, k)) }
18959}
18960
18961/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18962///
18963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
18964#[inline]
18965#[target_feature(enable = "avx512f,avx512vl")]
18966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18967#[cfg_attr(test, assert_instr(vexpandps))]
18968pub fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18969    unsafe { transmute(vexpandps256(a.as_f32x8(), src.as_f32x8(), k)) }
18970}
18971
18972/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18973///
18974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
18975#[inline]
18976#[target_feature(enable = "avx512f,avx512vl")]
18977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18978#[cfg_attr(test, assert_instr(vexpandps))]
18979pub fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
18980    unsafe { transmute(vexpandps256(a.as_f32x8(), f32x8::ZERO, k)) }
18981}
18982
18983/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18984///
18985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
18986#[inline]
18987#[target_feature(enable = "avx512f,avx512vl")]
18988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18989#[cfg_attr(test, assert_instr(vexpandps))]
18990pub fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18991    unsafe { transmute(vexpandps128(a.as_f32x4(), src.as_f32x4(), k)) }
18992}
18993
18994/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18995///
18996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
18997#[inline]
18998#[target_feature(enable = "avx512f,avx512vl")]
18999#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19000#[cfg_attr(test, assert_instr(vexpandps))]
19001pub fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
19002    unsafe { transmute(vexpandps128(a.as_f32x4(), f32x4::ZERO, k)) }
19003}
19004
19005/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19006///
19007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
19008#[inline]
19009#[target_feature(enable = "avx512f")]
19010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19011#[cfg_attr(test, assert_instr(vexpandpd))]
19012pub fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
19013    unsafe { transmute(vexpandpd(a.as_f64x8(), src.as_f64x8(), k)) }
19014}
19015
19016/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19017///
19018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
19019#[inline]
19020#[target_feature(enable = "avx512f")]
19021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19022#[cfg_attr(test, assert_instr(vexpandpd))]
19023pub fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
19024    unsafe { transmute(vexpandpd(a.as_f64x8(), f64x8::ZERO, k)) }
19025}
19026
19027/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19028///
19029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
19030#[inline]
19031#[target_feature(enable = "avx512f,avx512vl")]
19032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19033#[cfg_attr(test, assert_instr(vexpandpd))]
19034pub fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
19035    unsafe { transmute(vexpandpd256(a.as_f64x4(), src.as_f64x4(), k)) }
19036}
19037
19038/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19039///
19040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
19041#[inline]
19042#[target_feature(enable = "avx512f,avx512vl")]
19043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19044#[cfg_attr(test, assert_instr(vexpandpd))]
19045pub fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
19046    unsafe { transmute(vexpandpd256(a.as_f64x4(), f64x4::ZERO, k)) }
19047}
19048
19049/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19050///
19051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
19052#[inline]
19053#[target_feature(enable = "avx512f,avx512vl")]
19054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19055#[cfg_attr(test, assert_instr(vexpandpd))]
19056pub fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
19057    unsafe { transmute(vexpandpd128(a.as_f64x2(), src.as_f64x2(), k)) }
19058}
19059
19060/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19061///
19062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
19063#[inline]
19064#[target_feature(enable = "avx512f,avx512vl")]
19065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19066#[cfg_attr(test, assert_instr(vexpandpd))]
19067pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
19068    unsafe { transmute(vexpandpd128(a.as_f64x2(), f64x2::ZERO, k)) }
19069}
19070
19071/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19072///
19073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
19074#[inline]
19075#[target_feature(enable = "avx512f")]
19076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19077#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19078#[rustc_legacy_const_generics(1)]
19079pub fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19080    unsafe {
19081        static_assert_uimm_bits!(IMM8, 8);
19082        let a = a.as_i32x16();
19083        let r = vprold(a, IMM8);
19084        transmute(r)
19085    }
19086}
19087
19088/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19089///
19090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
19091#[inline]
19092#[target_feature(enable = "avx512f")]
19093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19094#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19095#[rustc_legacy_const_generics(3)]
19096pub fn _mm512_mask_rol_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19097    unsafe {
19098        static_assert_uimm_bits!(IMM8, 8);
19099        let a = a.as_i32x16();
19100        let r = vprold(a, IMM8);
19101        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
19102    }
19103}
19104
19105/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19106///
19107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
19108#[inline]
19109#[target_feature(enable = "avx512f")]
19110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19111#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19112#[rustc_legacy_const_generics(2)]
19113pub fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19114    unsafe {
19115        static_assert_uimm_bits!(IMM8, 8);
19116        let a = a.as_i32x16();
19117        let r = vprold(a, IMM8);
19118        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
19119    }
19120}
19121
19122/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19123///
19124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
19125#[inline]
19126#[target_feature(enable = "avx512f,avx512vl")]
19127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19128#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19129#[rustc_legacy_const_generics(1)]
19130pub fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19131    unsafe {
19132        static_assert_uimm_bits!(IMM8, 8);
19133        let a = a.as_i32x8();
19134        let r = vprold256(a, IMM8);
19135        transmute(r)
19136    }
19137}
19138
19139/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19140///
19141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
19142#[inline]
19143#[target_feature(enable = "avx512f,avx512vl")]
19144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19145#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19146#[rustc_legacy_const_generics(3)]
19147pub fn _mm256_mask_rol_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19148    unsafe {
19149        static_assert_uimm_bits!(IMM8, 8);
19150        let a = a.as_i32x8();
19151        let r = vprold256(a, IMM8);
19152        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
19153    }
19154}
19155
19156/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19157///
19158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
19159#[inline]
19160#[target_feature(enable = "avx512f,avx512vl")]
19161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19162#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19163#[rustc_legacy_const_generics(2)]
19164pub fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19165    unsafe {
19166        static_assert_uimm_bits!(IMM8, 8);
19167        let a = a.as_i32x8();
19168        let r = vprold256(a, IMM8);
19169        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
19170    }
19171}
19172
19173/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19174///
19175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
19176#[inline]
19177#[target_feature(enable = "avx512f,avx512vl")]
19178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19179#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19180#[rustc_legacy_const_generics(1)]
19181pub fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19182    unsafe {
19183        static_assert_uimm_bits!(IMM8, 8);
19184        let a = a.as_i32x4();
19185        let r = vprold128(a, IMM8);
19186        transmute(r)
19187    }
19188}
19189
19190/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19191///
19192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
19193#[inline]
19194#[target_feature(enable = "avx512f,avx512vl")]
19195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19196#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19197#[rustc_legacy_const_generics(3)]
19198pub fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19199    unsafe {
19200        static_assert_uimm_bits!(IMM8, 8);
19201        let a = a.as_i32x4();
19202        let r = vprold128(a, IMM8);
19203        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
19204    }
19205}
19206
19207/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19208///
19209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
19210#[inline]
19211#[target_feature(enable = "avx512f,avx512vl")]
19212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19213#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19214#[rustc_legacy_const_generics(2)]
19215pub fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19216    unsafe {
19217        static_assert_uimm_bits!(IMM8, 8);
19218        let a = a.as_i32x4();
19219        let r = vprold128(a, IMM8);
19220        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
19221    }
19222}
19223
19224/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19225///
19226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
19227#[inline]
19228#[target_feature(enable = "avx512f")]
19229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19230#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19231#[rustc_legacy_const_generics(1)]
19232pub fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19233    unsafe {
19234        static_assert_uimm_bits!(IMM8, 8);
19235        let a = a.as_i32x16();
19236        let r = vprord(a, IMM8);
19237        transmute(r)
19238    }
19239}
19240
19241/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19242///
19243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
19244#[inline]
19245#[target_feature(enable = "avx512f")]
19246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19247#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19248#[rustc_legacy_const_generics(3)]
19249pub fn _mm512_mask_ror_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19250    unsafe {
19251        static_assert_uimm_bits!(IMM8, 8);
19252        let a = a.as_i32x16();
19253        let r = vprord(a, IMM8);
19254        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
19255    }
19256}
19257
19258/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19259///
19260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
19261#[inline]
19262#[target_feature(enable = "avx512f")]
19263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19264#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19265#[rustc_legacy_const_generics(2)]
19266pub fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19267    unsafe {
19268        static_assert_uimm_bits!(IMM8, 8);
19269        let a = a.as_i32x16();
19270        let r = vprord(a, IMM8);
19271        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
19272    }
19273}
19274
19275/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19276///
19277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
19278#[inline]
19279#[target_feature(enable = "avx512f,avx512vl")]
19280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19281#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19282#[rustc_legacy_const_generics(1)]
19283pub fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19284    unsafe {
19285        static_assert_uimm_bits!(IMM8, 8);
19286        let a = a.as_i32x8();
19287        let r = vprord256(a, IMM8);
19288        transmute(r)
19289    }
19290}
19291
19292/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19293///
19294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
19295#[inline]
19296#[target_feature(enable = "avx512f,avx512vl")]
19297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19298#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19299#[rustc_legacy_const_generics(3)]
19300pub fn _mm256_mask_ror_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19301    unsafe {
19302        static_assert_uimm_bits!(IMM8, 8);
19303        let a = a.as_i32x8();
19304        let r = vprord256(a, IMM8);
19305        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
19306    }
19307}
19308
19309/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19310///
19311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
19312#[inline]
19313#[target_feature(enable = "avx512f,avx512vl")]
19314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19315#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19316#[rustc_legacy_const_generics(2)]
19317pub fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19318    unsafe {
19319        static_assert_uimm_bits!(IMM8, 8);
19320        let a = a.as_i32x8();
19321        let r = vprord256(a, IMM8);
19322        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
19323    }
19324}
19325
19326/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19327///
19328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
19329#[inline]
19330#[target_feature(enable = "avx512f,avx512vl")]
19331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19332#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19333#[rustc_legacy_const_generics(1)]
19334pub fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19335    unsafe {
19336        static_assert_uimm_bits!(IMM8, 8);
19337        let a = a.as_i32x4();
19338        let r = vprord128(a, IMM8);
19339        transmute(r)
19340    }
19341}
19342
19343/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19344///
19345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
19346#[inline]
19347#[target_feature(enable = "avx512f,avx512vl")]
19348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19349#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19350#[rustc_legacy_const_generics(3)]
19351pub fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19352    unsafe {
19353        static_assert_uimm_bits!(IMM8, 8);
19354        let a = a.as_i32x4();
19355        let r = vprord128(a, IMM8);
19356        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
19357    }
19358}
19359
19360/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19361///
19362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
19363#[inline]
19364#[target_feature(enable = "avx512f,avx512vl")]
19365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19366#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19367#[rustc_legacy_const_generics(2)]
19368pub fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19369    unsafe {
19370        static_assert_uimm_bits!(IMM8, 8);
19371        let a = a.as_i32x4();
19372        let r = vprord128(a, IMM8);
19373        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
19374    }
19375}
19376
19377/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19378///
19379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
19380#[inline]
19381#[target_feature(enable = "avx512f")]
19382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19383#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19384#[rustc_legacy_const_generics(1)]
19385pub fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19386    unsafe {
19387        static_assert_uimm_bits!(IMM8, 8);
19388        let a = a.as_i64x8();
19389        let r = vprolq(a, IMM8);
19390        transmute(r)
19391    }
19392}
19393
19394/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19395///
19396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
19397#[inline]
19398#[target_feature(enable = "avx512f")]
19399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19400#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19401#[rustc_legacy_const_generics(3)]
19402pub fn _mm512_mask_rol_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19403    unsafe {
19404        static_assert_uimm_bits!(IMM8, 8);
19405        let a = a.as_i64x8();
19406        let r = vprolq(a, IMM8);
19407        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
19408    }
19409}
19410
19411/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19412///
19413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
19414#[inline]
19415#[target_feature(enable = "avx512f")]
19416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19417#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19418#[rustc_legacy_const_generics(2)]
19419pub fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19420    unsafe {
19421        static_assert_uimm_bits!(IMM8, 8);
19422        let a = a.as_i64x8();
19423        let r = vprolq(a, IMM8);
19424        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
19425    }
19426}
19427
19428/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19429///
19430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
19431#[inline]
19432#[target_feature(enable = "avx512f,avx512vl")]
19433#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19434#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19435#[rustc_legacy_const_generics(1)]
19436pub fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19437    unsafe {
19438        static_assert_uimm_bits!(IMM8, 8);
19439        let a = a.as_i64x4();
19440        let r = vprolq256(a, IMM8);
19441        transmute(r)
19442    }
19443}
19444
19445/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19446///
19447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
19448#[inline]
19449#[target_feature(enable = "avx512f,avx512vl")]
19450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19451#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19452#[rustc_legacy_const_generics(3)]
19453pub fn _mm256_mask_rol_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19454    unsafe {
19455        static_assert_uimm_bits!(IMM8, 8);
19456        let a = a.as_i64x4();
19457        let r = vprolq256(a, IMM8);
19458        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
19459    }
19460}
19461
19462/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19463///
19464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
19465#[inline]
19466#[target_feature(enable = "avx512f,avx512vl")]
19467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19468#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19469#[rustc_legacy_const_generics(2)]
19470pub fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19471    unsafe {
19472        static_assert_uimm_bits!(IMM8, 8);
19473        let a = a.as_i64x4();
19474        let r = vprolq256(a, IMM8);
19475        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
19476    }
19477}
19478
19479/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19480///
19481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
19482#[inline]
19483#[target_feature(enable = "avx512f,avx512vl")]
19484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19485#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19486#[rustc_legacy_const_generics(1)]
19487pub fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19488    unsafe {
19489        static_assert_uimm_bits!(IMM8, 8);
19490        let a = a.as_i64x2();
19491        let r = vprolq128(a, IMM8);
19492        transmute(r)
19493    }
19494}
19495
19496/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19497///
19498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
19499#[inline]
19500#[target_feature(enable = "avx512f,avx512vl")]
19501#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19502#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19503#[rustc_legacy_const_generics(3)]
19504pub fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19505    unsafe {
19506        static_assert_uimm_bits!(IMM8, 8);
19507        let a = a.as_i64x2();
19508        let r = vprolq128(a, IMM8);
19509        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
19510    }
19511}
19512
19513/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19514///
19515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
19516#[inline]
19517#[target_feature(enable = "avx512f,avx512vl")]
19518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19519#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19520#[rustc_legacy_const_generics(2)]
19521pub fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19522    unsafe {
19523        static_assert_uimm_bits!(IMM8, 8);
19524        let a = a.as_i64x2();
19525        let r = vprolq128(a, IMM8);
19526        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
19527    }
19528}
19529
19530/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19531///
19532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
19533#[inline]
19534#[target_feature(enable = "avx512f")]
19535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19536#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19537#[rustc_legacy_const_generics(1)]
19538pub fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19539    unsafe {
19540        static_assert_uimm_bits!(IMM8, 8);
19541        let a = a.as_i64x8();
19542        let r = vprorq(a, IMM8);
19543        transmute(r)
19544    }
19545}
19546
19547/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19548///
19549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
19550#[inline]
19551#[target_feature(enable = "avx512f")]
19552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19553#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19554#[rustc_legacy_const_generics(3)]
19555pub fn _mm512_mask_ror_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19556    unsafe {
19557        static_assert_uimm_bits!(IMM8, 8);
19558        let a = a.as_i64x8();
19559        let r = vprorq(a, IMM8);
19560        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
19561    }
19562}
19563
19564/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19565///
19566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
19567#[inline]
19568#[target_feature(enable = "avx512f")]
19569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19570#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19571#[rustc_legacy_const_generics(2)]
19572pub fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19573    unsafe {
19574        static_assert_uimm_bits!(IMM8, 8);
19575        let a = a.as_i64x8();
19576        let r = vprorq(a, IMM8);
19577        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
19578    }
19579}
19580
19581/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19582///
19583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
19584#[inline]
19585#[target_feature(enable = "avx512f,avx512vl")]
19586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19587#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19588#[rustc_legacy_const_generics(1)]
19589pub fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19590    unsafe {
19591        static_assert_uimm_bits!(IMM8, 8);
19592        let a = a.as_i64x4();
19593        let r = vprorq256(a, IMM8);
19594        transmute(r)
19595    }
19596}
19597
19598/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19599///
19600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
19601#[inline]
19602#[target_feature(enable = "avx512f,avx512vl")]
19603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19604#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19605#[rustc_legacy_const_generics(3)]
19606pub fn _mm256_mask_ror_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19607    unsafe {
19608        static_assert_uimm_bits!(IMM8, 8);
19609        let a = a.as_i64x4();
19610        let r = vprorq256(a, IMM8);
19611        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
19612    }
19613}
19614
19615/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19616///
19617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
19618#[inline]
19619#[target_feature(enable = "avx512f,avx512vl")]
19620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19621#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19622#[rustc_legacy_const_generics(2)]
19623pub fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19624    unsafe {
19625        static_assert_uimm_bits!(IMM8, 8);
19626        let a = a.as_i64x4();
19627        let r = vprorq256(a, IMM8);
19628        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
19629    }
19630}
19631
19632/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19633///
19634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
19635#[inline]
19636#[target_feature(enable = "avx512f,avx512vl")]
19637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19638#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19639#[rustc_legacy_const_generics(1)]
19640pub fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19641    unsafe {
19642        static_assert_uimm_bits!(IMM8, 8);
19643        let a = a.as_i64x2();
19644        let r = vprorq128(a, IMM8);
19645        transmute(r)
19646    }
19647}
19648
19649/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19650///
19651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
19652#[inline]
19653#[target_feature(enable = "avx512f,avx512vl")]
19654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19655#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19656#[rustc_legacy_const_generics(3)]
19657pub fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19658    unsafe {
19659        static_assert_uimm_bits!(IMM8, 8);
19660        let a = a.as_i64x2();
19661        let r = vprorq128(a, IMM8);
19662        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
19663    }
19664}
19665
19666/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19667///
19668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
19669#[inline]
19670#[target_feature(enable = "avx512f,avx512vl")]
19671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19672#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19673#[rustc_legacy_const_generics(2)]
19674pub fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19675    unsafe {
19676        static_assert_uimm_bits!(IMM8, 8);
19677        let a = a.as_i64x2();
19678        let r = vprorq128(a, IMM8);
19679        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
19680    }
19681}
19682
19683/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19684///
19685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
19686#[inline]
19687#[target_feature(enable = "avx512f")]
19688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19689#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19690#[rustc_legacy_const_generics(1)]
19691pub fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19692    unsafe {
19693        static_assert_uimm_bits!(IMM8, 8);
19694        if IMM8 >= 32 {
19695            _mm512_setzero_si512()
19696        } else {
19697            transmute(simd_shl(a.as_u32x16(), u32x16::splat(IMM8)))
19698        }
19699    }
19700}
19701
19702/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19703///
19704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
19705#[inline]
19706#[target_feature(enable = "avx512f")]
19707#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19708#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19709#[rustc_legacy_const_generics(3)]
19710pub fn _mm512_mask_slli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19711    unsafe {
19712        static_assert_uimm_bits!(IMM8, 8);
19713        let shf = if IMM8 >= 32 {
19714            u32x16::ZERO
19715        } else {
19716            simd_shl(a.as_u32x16(), u32x16::splat(IMM8))
19717        };
19718        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
19719    }
19720}
19721
19722/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19723///
19724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
19725#[inline]
19726#[target_feature(enable = "avx512f")]
19727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19728#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19729#[rustc_legacy_const_generics(2)]
19730pub fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19731    unsafe {
19732        static_assert_uimm_bits!(IMM8, 8);
19733        if IMM8 >= 32 {
19734            _mm512_setzero_si512()
19735        } else {
19736            let shf = simd_shl(a.as_u32x16(), u32x16::splat(IMM8));
19737            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
19738        }
19739    }
19740}
19741
19742/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19743///
19744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
19745#[inline]
19746#[target_feature(enable = "avx512f,avx512vl")]
19747#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19748#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19749#[rustc_legacy_const_generics(3)]
19750pub fn _mm256_mask_slli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19751    unsafe {
19752        static_assert_uimm_bits!(IMM8, 8);
19753        let r = if IMM8 >= 32 {
19754            u32x8::ZERO
19755        } else {
19756            simd_shl(a.as_u32x8(), u32x8::splat(IMM8))
19757        };
19758        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
19759    }
19760}
19761
19762/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19763///
19764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
19765#[inline]
19766#[target_feature(enable = "avx512f,avx512vl")]
19767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19768#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19769#[rustc_legacy_const_generics(2)]
19770pub fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19771    unsafe {
19772        static_assert_uimm_bits!(IMM8, 8);
19773        if IMM8 >= 32 {
19774            _mm256_setzero_si256()
19775        } else {
19776            let r = simd_shl(a.as_u32x8(), u32x8::splat(IMM8));
19777            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
19778        }
19779    }
19780}
19781
19782/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19783///
19784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
19785#[inline]
19786#[target_feature(enable = "avx512f,avx512vl")]
19787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19788#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19789#[rustc_legacy_const_generics(3)]
19790pub fn _mm_mask_slli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19791    unsafe {
19792        static_assert_uimm_bits!(IMM8, 8);
19793        let r = if IMM8 >= 32 {
19794            u32x4::ZERO
19795        } else {
19796            simd_shl(a.as_u32x4(), u32x4::splat(IMM8))
19797        };
19798        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
19799    }
19800}
19801
19802/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19803///
19804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
19805#[inline]
19806#[target_feature(enable = "avx512f,avx512vl")]
19807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19808#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19809#[rustc_legacy_const_generics(2)]
19810pub fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19811    unsafe {
19812        static_assert_uimm_bits!(IMM8, 8);
19813        if IMM8 >= 32 {
19814            _mm_setzero_si128()
19815        } else {
19816            let r = simd_shl(a.as_u32x4(), u32x4::splat(IMM8));
19817            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
19818        }
19819    }
19820}
19821
19822/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
19823///
19824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
19825#[inline]
19826#[target_feature(enable = "avx512f")]
19827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19828#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19829#[rustc_legacy_const_generics(1)]
19830pub fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19831    unsafe {
19832        static_assert_uimm_bits!(IMM8, 8);
19833        if IMM8 >= 32 {
19834            _mm512_setzero_si512()
19835        } else {
19836            transmute(simd_shr(a.as_u32x16(), u32x16::splat(IMM8)))
19837        }
19838    }
19839}
19840
19841/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19842///
19843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
19844#[inline]
19845#[target_feature(enable = "avx512f")]
19846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19847#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19848#[rustc_legacy_const_generics(3)]
19849pub fn _mm512_mask_srli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19850    unsafe {
19851        static_assert_uimm_bits!(IMM8, 8);
19852        let shf = if IMM8 >= 32 {
19853            u32x16::ZERO
19854        } else {
19855            simd_shr(a.as_u32x16(), u32x16::splat(IMM8))
19856        };
19857        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
19858    }
19859}
19860
19861/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19862///
19863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
19864#[inline]
19865#[target_feature(enable = "avx512f")]
19866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19867#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19868#[rustc_legacy_const_generics(2)]
19869pub fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19870    unsafe {
19871        static_assert_uimm_bits!(IMM8, 8);
19872        if IMM8 >= 32 {
19873            _mm512_setzero_si512()
19874        } else {
19875            let shf = simd_shr(a.as_u32x16(), u32x16::splat(IMM8));
19876            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
19877        }
19878    }
19879}
19880
19881/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19882///
19883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
19884#[inline]
19885#[target_feature(enable = "avx512f,avx512vl")]
19886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19887#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19888#[rustc_legacy_const_generics(3)]
19889pub fn _mm256_mask_srli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19890    unsafe {
19891        static_assert_uimm_bits!(IMM8, 8);
19892        let r = if IMM8 >= 32 {
19893            u32x8::ZERO
19894        } else {
19895            simd_shr(a.as_u32x8(), u32x8::splat(IMM8))
19896        };
19897        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
19898    }
19899}
19900
19901/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19902///
19903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
19904#[inline]
19905#[target_feature(enable = "avx512f,avx512vl")]
19906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19907#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19908#[rustc_legacy_const_generics(2)]
19909pub fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19910    unsafe {
19911        static_assert_uimm_bits!(IMM8, 8);
19912        if IMM8 >= 32 {
19913            _mm256_setzero_si256()
19914        } else {
19915            let r = simd_shr(a.as_u32x8(), u32x8::splat(IMM8));
19916            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
19917        }
19918    }
19919}
19920
19921/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19922///
19923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
19924#[inline]
19925#[target_feature(enable = "avx512f,avx512vl")]
19926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19927#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19928#[rustc_legacy_const_generics(3)]
19929pub fn _mm_mask_srli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19930    unsafe {
19931        static_assert_uimm_bits!(IMM8, 8);
19932        let r = if IMM8 >= 32 {
19933            u32x4::ZERO
19934        } else {
19935            simd_shr(a.as_u32x4(), u32x4::splat(IMM8))
19936        };
19937        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
19938    }
19939}
19940
19941/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19942///
19943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
19944#[inline]
19945#[target_feature(enable = "avx512f,avx512vl")]
19946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19947#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19948#[rustc_legacy_const_generics(2)]
19949pub fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19950    unsafe {
19951        static_assert_uimm_bits!(IMM8, 8);
19952        if IMM8 >= 32 {
19953            _mm_setzero_si128()
19954        } else {
19955            let r = simd_shr(a.as_u32x4(), u32x4::splat(IMM8));
19956            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
19957        }
19958    }
19959}
19960
19961/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19962///
19963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
19964#[inline]
19965#[target_feature(enable = "avx512f")]
19966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19967#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19968#[rustc_legacy_const_generics(1)]
19969pub fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
19970    unsafe {
19971        static_assert_uimm_bits!(IMM8, 8);
19972        if IMM8 >= 64 {
19973            _mm512_setzero_si512()
19974        } else {
19975            transmute(simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
19976        }
19977    }
19978}
19979
19980/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19981///
19982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
19983#[inline]
19984#[target_feature(enable = "avx512f")]
19985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19986#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19987#[rustc_legacy_const_generics(3)]
19988pub fn _mm512_mask_slli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19989    unsafe {
19990        static_assert_uimm_bits!(IMM8, 8);
19991        let shf = if IMM8 >= 64 {
19992            u64x8::ZERO
19993        } else {
19994            simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64))
19995        };
19996        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
19997    }
19998}
19999
20000/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20001///
20002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
20003#[inline]
20004#[target_feature(enable = "avx512f")]
20005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20006#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20007#[rustc_legacy_const_generics(2)]
20008pub fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20009    unsafe {
20010        static_assert_uimm_bits!(IMM8, 8);
20011        if IMM8 >= 64 {
20012            _mm512_setzero_si512()
20013        } else {
20014            let shf = simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64));
20015            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
20016        }
20017    }
20018}
20019
20020/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20021///
20022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
20023#[inline]
20024#[target_feature(enable = "avx512f,avx512vl")]
20025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20026#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20027#[rustc_legacy_const_generics(3)]
20028pub fn _mm256_mask_slli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20029    unsafe {
20030        static_assert_uimm_bits!(IMM8, 8);
20031        let r = if IMM8 >= 64 {
20032            u64x4::ZERO
20033        } else {
20034            simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64))
20035        };
20036        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
20037    }
20038}
20039
20040/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20041///
20042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
20043#[inline]
20044#[target_feature(enable = "avx512f,avx512vl")]
20045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20046#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20047#[rustc_legacy_const_generics(2)]
20048pub fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20049    unsafe {
20050        static_assert_uimm_bits!(IMM8, 8);
20051        if IMM8 >= 64 {
20052            _mm256_setzero_si256()
20053        } else {
20054            let r = simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64));
20055            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
20056        }
20057    }
20058}
20059
20060/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20061///
20062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
20063#[inline]
20064#[target_feature(enable = "avx512f,avx512vl")]
20065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20066#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20067#[rustc_legacy_const_generics(3)]
20068pub fn _mm_mask_slli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20069    unsafe {
20070        static_assert_uimm_bits!(IMM8, 8);
20071        let r = if IMM8 >= 64 {
20072            u64x2::ZERO
20073        } else {
20074            simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20075        };
20076        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20077    }
20078}
20079
20080/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20081///
20082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
20083#[inline]
20084#[target_feature(enable = "avx512f,avx512vl")]
20085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20086#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20087#[rustc_legacy_const_generics(2)]
20088pub fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20089    unsafe {
20090        static_assert_uimm_bits!(IMM8, 8);
20091        if IMM8 >= 64 {
20092            _mm_setzero_si128()
20093        } else {
20094            let r = simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20095            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20096        }
20097    }
20098}
20099
20100/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20101///
20102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
20103#[inline]
20104#[target_feature(enable = "avx512f")]
20105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20106#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20107#[rustc_legacy_const_generics(1)]
20108pub fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20109    unsafe {
20110        static_assert_uimm_bits!(IMM8, 8);
20111        if IMM8 >= 64 {
20112            _mm512_setzero_si512()
20113        } else {
20114            transmute(simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
20115        }
20116    }
20117}
20118
20119/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20120///
20121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
20122#[inline]
20123#[target_feature(enable = "avx512f")]
20124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20125#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20126#[rustc_legacy_const_generics(3)]
20127pub fn _mm512_mask_srli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20128    unsafe {
20129        static_assert_uimm_bits!(IMM8, 8);
20130        let shf = if IMM8 >= 64 {
20131            u64x8::ZERO
20132        } else {
20133            simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64))
20134        };
20135        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
20136    }
20137}
20138
20139/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20140///
20141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
20142#[inline]
20143#[target_feature(enable = "avx512f")]
20144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20145#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20146#[rustc_legacy_const_generics(2)]
20147pub fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20148    unsafe {
20149        static_assert_uimm_bits!(IMM8, 8);
20150        if IMM8 >= 64 {
20151            _mm512_setzero_si512()
20152        } else {
20153            let shf = simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64));
20154            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
20155        }
20156    }
20157}
20158
20159/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20160///
20161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
20162#[inline]
20163#[target_feature(enable = "avx512f,avx512vl")]
20164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20165#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20166#[rustc_legacy_const_generics(3)]
20167pub fn _mm256_mask_srli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20168    unsafe {
20169        static_assert_uimm_bits!(IMM8, 8);
20170        let r = if IMM8 >= 64 {
20171            u64x4::ZERO
20172        } else {
20173            simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64))
20174        };
20175        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
20176    }
20177}
20178
20179/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20180///
20181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
20182#[inline]
20183#[target_feature(enable = "avx512f,avx512vl")]
20184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20185#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20186#[rustc_legacy_const_generics(2)]
20187pub fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20188    unsafe {
20189        static_assert_uimm_bits!(IMM8, 8);
20190        if IMM8 >= 64 {
20191            _mm256_setzero_si256()
20192        } else {
20193            let r = simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64));
20194            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
20195        }
20196    }
20197}
20198
20199/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20200///
20201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
20202#[inline]
20203#[target_feature(enable = "avx512f,avx512vl")]
20204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20205#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20206#[rustc_legacy_const_generics(3)]
20207pub fn _mm_mask_srli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20208    unsafe {
20209        static_assert_uimm_bits!(IMM8, 8);
20210        let r = if IMM8 >= 64 {
20211            u64x2::ZERO
20212        } else {
20213            simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20214        };
20215        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20216    }
20217}
20218
20219/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20220///
20221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
20222#[inline]
20223#[target_feature(enable = "avx512f,avx512vl")]
20224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20225#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20226#[rustc_legacy_const_generics(2)]
20227pub fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20228    unsafe {
20229        static_assert_uimm_bits!(IMM8, 8);
20230        if IMM8 >= 64 {
20231            _mm_setzero_si128()
20232        } else {
20233            let r = simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20234            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20235        }
20236    }
20237}
20238
20239/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
20240///
20241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
20242#[inline]
20243#[target_feature(enable = "avx512f")]
20244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20245#[cfg_attr(test, assert_instr(vpslld))]
20246pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
20247    unsafe { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) }
20248}
20249
20250/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20251///
20252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
20253#[inline]
20254#[target_feature(enable = "avx512f")]
20255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20256#[cfg_attr(test, assert_instr(vpslld))]
20257pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20258    unsafe {
20259        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20260        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20261    }
20262}
20263
20264/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20265///
20266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
20267#[inline]
20268#[target_feature(enable = "avx512f")]
20269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20270#[cfg_attr(test, assert_instr(vpslld))]
20271pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20272    unsafe {
20273        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20274        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20275    }
20276}
20277
20278/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20279///
20280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
20281#[inline]
20282#[target_feature(enable = "avx512f,avx512vl")]
20283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20284#[cfg_attr(test, assert_instr(vpslld))]
20285pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20286    unsafe {
20287        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20288        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20289    }
20290}
20291
20292/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20293///
20294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
20295#[inline]
20296#[target_feature(enable = "avx512f,avx512vl")]
20297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20298#[cfg_attr(test, assert_instr(vpslld))]
20299pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20300    unsafe {
20301        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20302        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20303    }
20304}
20305
20306/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20307///
20308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
20309#[inline]
20310#[target_feature(enable = "avx512f,avx512vl")]
20311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20312#[cfg_attr(test, assert_instr(vpslld))]
20313pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20314    unsafe {
20315        let shf = _mm_sll_epi32(a, count).as_i32x4();
20316        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20317    }
20318}
20319
20320/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20321///
20322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
20323#[inline]
20324#[target_feature(enable = "avx512f,avx512vl")]
20325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20326#[cfg_attr(test, assert_instr(vpslld))]
20327pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20328    unsafe {
20329        let shf = _mm_sll_epi32(a, count).as_i32x4();
20330        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20331    }
20332}
20333
20334/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
20335///
20336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
20337#[inline]
20338#[target_feature(enable = "avx512f")]
20339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20340#[cfg_attr(test, assert_instr(vpsrld))]
20341pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
20342    unsafe { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) }
20343}
20344
20345/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20346///
20347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
20348#[inline]
20349#[target_feature(enable = "avx512f")]
20350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20351#[cfg_attr(test, assert_instr(vpsrld))]
20352pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20353    unsafe {
20354        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20355        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20356    }
20357}
20358
20359/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20360///
20361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
20362#[inline]
20363#[target_feature(enable = "avx512f")]
20364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20365#[cfg_attr(test, assert_instr(vpsrld))]
20366pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20367    unsafe {
20368        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20369        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20370    }
20371}
20372
20373/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20374///
20375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
20376#[inline]
20377#[target_feature(enable = "avx512f,avx512vl")]
20378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20379#[cfg_attr(test, assert_instr(vpsrld))]
20380pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20381    unsafe {
20382        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20383        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20384    }
20385}
20386
20387/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20388///
20389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
20390#[inline]
20391#[target_feature(enable = "avx512f,avx512vl")]
20392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20393#[cfg_attr(test, assert_instr(vpsrld))]
20394pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20395    unsafe {
20396        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20397        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20398    }
20399}
20400
20401/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20402///
20403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
20404#[inline]
20405#[target_feature(enable = "avx512f,avx512vl")]
20406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20407#[cfg_attr(test, assert_instr(vpsrld))]
20408pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20409    unsafe {
20410        let shf = _mm_srl_epi32(a, count).as_i32x4();
20411        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20412    }
20413}
20414
20415/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20416///
20417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
20418#[inline]
20419#[target_feature(enable = "avx512f,avx512vl")]
20420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20421#[cfg_attr(test, assert_instr(vpsrld))]
20422pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20423    unsafe {
20424        let shf = _mm_srl_epi32(a, count).as_i32x4();
20425        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20426    }
20427}
20428
20429/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
20430///
20431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
20432#[inline]
20433#[target_feature(enable = "avx512f")]
20434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20435#[cfg_attr(test, assert_instr(vpsllq))]
20436pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
20437    unsafe { transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) }
20438}
20439
20440/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20441///
20442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
20443#[inline]
20444#[target_feature(enable = "avx512f")]
20445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20446#[cfg_attr(test, assert_instr(vpsllq))]
20447pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20448    unsafe {
20449        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20450        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20451    }
20452}
20453
20454/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20455///
20456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
20457#[inline]
20458#[target_feature(enable = "avx512f")]
20459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20460#[cfg_attr(test, assert_instr(vpsllq))]
20461pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20462    unsafe {
20463        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20464        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20465    }
20466}
20467
20468/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20469///
20470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
20471#[inline]
20472#[target_feature(enable = "avx512f,avx512vl")]
20473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20474#[cfg_attr(test, assert_instr(vpsllq))]
20475pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20476    unsafe {
20477        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20478        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20479    }
20480}
20481
20482/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20483///
20484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
20485#[inline]
20486#[target_feature(enable = "avx512f,avx512vl")]
20487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20488#[cfg_attr(test, assert_instr(vpsllq))]
20489pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20490    unsafe {
20491        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20492        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20493    }
20494}
20495
20496/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20497///
20498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
20499#[inline]
20500#[target_feature(enable = "avx512f,avx512vl")]
20501#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20502#[cfg_attr(test, assert_instr(vpsllq))]
20503pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20504    unsafe {
20505        let shf = _mm_sll_epi64(a, count).as_i64x2();
20506        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20507    }
20508}
20509
20510/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20511///
20512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
20513#[inline]
20514#[target_feature(enable = "avx512f,avx512vl")]
20515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20516#[cfg_attr(test, assert_instr(vpsllq))]
20517pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20518    unsafe {
20519        let shf = _mm_sll_epi64(a, count).as_i64x2();
20520        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20521    }
20522}
20523
20524/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
20525///
20526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
20527#[inline]
20528#[target_feature(enable = "avx512f")]
20529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20530#[cfg_attr(test, assert_instr(vpsrlq))]
20531pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
20532    unsafe { transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) }
20533}
20534
20535/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20536///
20537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
20538#[inline]
20539#[target_feature(enable = "avx512f")]
20540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20541#[cfg_attr(test, assert_instr(vpsrlq))]
20542pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20543    unsafe {
20544        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20545        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20546    }
20547}
20548
20549/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20550///
20551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
20552#[inline]
20553#[target_feature(enable = "avx512f")]
20554#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20555#[cfg_attr(test, assert_instr(vpsrlq))]
20556pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20557    unsafe {
20558        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20559        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20560    }
20561}
20562
20563/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20564///
20565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
20566#[inline]
20567#[target_feature(enable = "avx512f,avx512vl")]
20568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20569#[cfg_attr(test, assert_instr(vpsrlq))]
20570pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20571    unsafe {
20572        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20573        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20574    }
20575}
20576
20577/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20578///
20579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
20580#[inline]
20581#[target_feature(enable = "avx512f,avx512vl")]
20582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20583#[cfg_attr(test, assert_instr(vpsrlq))]
20584pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20585    unsafe {
20586        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20587        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20588    }
20589}
20590
20591/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20592///
20593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
20594#[inline]
20595#[target_feature(enable = "avx512f,avx512vl")]
20596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20597#[cfg_attr(test, assert_instr(vpsrlq))]
20598pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20599    unsafe {
20600        let shf = _mm_srl_epi64(a, count).as_i64x2();
20601        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20602    }
20603}
20604
20605/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20606///
20607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
20608#[inline]
20609#[target_feature(enable = "avx512f,avx512vl")]
20610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20611#[cfg_attr(test, assert_instr(vpsrlq))]
20612pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20613    unsafe {
20614        let shf = _mm_srl_epi64(a, count).as_i64x2();
20615        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20616    }
20617}
20618
20619/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20620///
20621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
20622#[inline]
20623#[target_feature(enable = "avx512f")]
20624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20625#[cfg_attr(test, assert_instr(vpsrad))]
20626pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
20627    unsafe { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) }
20628}
20629
20630/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20631///
20632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
20633#[inline]
20634#[target_feature(enable = "avx512f")]
20635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20636#[cfg_attr(test, assert_instr(vpsrad))]
20637pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20638    unsafe {
20639        let shf = _mm512_sra_epi32(a, count).as_i32x16();
20640        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20641    }
20642}
20643
20644/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20645///
20646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
20647#[inline]
20648#[target_feature(enable = "avx512f")]
20649#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20650#[cfg_attr(test, assert_instr(vpsrad))]
20651pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20652    unsafe {
20653        let shf = _mm512_sra_epi32(a, count).as_i32x16();
20654        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20655    }
20656}
20657
20658/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20659///
20660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
20661#[inline]
20662#[target_feature(enable = "avx512f,avx512vl")]
20663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20664#[cfg_attr(test, assert_instr(vpsrad))]
20665pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20666    unsafe {
20667        let shf = _mm256_sra_epi32(a, count).as_i32x8();
20668        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20669    }
20670}
20671
20672/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20673///
20674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
20675#[inline]
20676#[target_feature(enable = "avx512f,avx512vl")]
20677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20678#[cfg_attr(test, assert_instr(vpsrad))]
20679pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20680    unsafe {
20681        let shf = _mm256_sra_epi32(a, count).as_i32x8();
20682        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20683    }
20684}
20685
20686/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20687///
20688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
20689#[inline]
20690#[target_feature(enable = "avx512f,avx512vl")]
20691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20692#[cfg_attr(test, assert_instr(vpsrad))]
20693pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20694    unsafe {
20695        let shf = _mm_sra_epi32(a, count).as_i32x4();
20696        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20697    }
20698}
20699
20700/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20701///
20702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
20703#[inline]
20704#[target_feature(enable = "avx512f,avx512vl")]
20705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20706#[cfg_attr(test, assert_instr(vpsrad))]
20707pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20708    unsafe {
20709        let shf = _mm_sra_epi32(a, count).as_i32x4();
20710        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20711    }
20712}
20713
20714/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20715///
20716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
20717#[inline]
20718#[target_feature(enable = "avx512f")]
20719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20720#[cfg_attr(test, assert_instr(vpsraq))]
20721pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
20722    unsafe { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) }
20723}
20724
20725/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20726///
20727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
20728#[inline]
20729#[target_feature(enable = "avx512f")]
20730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20731#[cfg_attr(test, assert_instr(vpsraq))]
20732pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20733    unsafe {
20734        let shf = _mm512_sra_epi64(a, count).as_i64x8();
20735        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20736    }
20737}
20738
20739/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20740///
20741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
20742#[inline]
20743#[target_feature(enable = "avx512f")]
20744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20745#[cfg_attr(test, assert_instr(vpsraq))]
20746pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20747    unsafe {
20748        let shf = _mm512_sra_epi64(a, count).as_i64x8();
20749        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20750    }
20751}
20752
20753/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20754///
20755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
20756#[inline]
20757#[target_feature(enable = "avx512f,avx512vl")]
20758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20759#[cfg_attr(test, assert_instr(vpsraq))]
20760pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
20761    unsafe { transmute(vpsraq256(a.as_i64x4(), count.as_i64x2())) }
20762}
20763
20764/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20765///
20766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
20767#[inline]
20768#[target_feature(enable = "avx512f,avx512vl")]
20769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20770#[cfg_attr(test, assert_instr(vpsraq))]
20771pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20772    unsafe {
20773        let shf = _mm256_sra_epi64(a, count).as_i64x4();
20774        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20775    }
20776}
20777
20778/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20779///
20780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
20781#[inline]
20782#[target_feature(enable = "avx512f,avx512vl")]
20783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20784#[cfg_attr(test, assert_instr(vpsraq))]
20785pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20786    unsafe {
20787        let shf = _mm256_sra_epi64(a, count).as_i64x4();
20788        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20789    }
20790}
20791
20792/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20793///
20794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
20795#[inline]
20796#[target_feature(enable = "avx512f,avx512vl")]
20797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20798#[cfg_attr(test, assert_instr(vpsraq))]
20799pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
20800    unsafe { transmute(vpsraq128(a.as_i64x2(), count.as_i64x2())) }
20801}
20802
20803/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20804///
20805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
20806#[inline]
20807#[target_feature(enable = "avx512f,avx512vl")]
20808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20809#[cfg_attr(test, assert_instr(vpsraq))]
20810pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20811    unsafe {
20812        let shf = _mm_sra_epi64(a, count).as_i64x2();
20813        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20814    }
20815}
20816
20817/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20818///
20819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
20820#[inline]
20821#[target_feature(enable = "avx512f,avx512vl")]
20822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20823#[cfg_attr(test, assert_instr(vpsraq))]
20824pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20825    unsafe {
20826        let shf = _mm_sra_epi64(a, count).as_i64x2();
20827        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20828    }
20829}
20830
20831/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20832///
20833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
20834#[inline]
20835#[target_feature(enable = "avx512f")]
20836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20837#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20838#[rustc_legacy_const_generics(1)]
20839pub fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20840    unsafe {
20841        static_assert_uimm_bits!(IMM8, 8);
20842        transmute(simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)))
20843    }
20844}
20845
20846/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20847///
20848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
20849#[inline]
20850#[target_feature(enable = "avx512f")]
20851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20852#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20853#[rustc_legacy_const_generics(3)]
20854pub fn _mm512_mask_srai_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
20855    unsafe {
20856        static_assert_uimm_bits!(IMM8, 8);
20857        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
20858        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
20859    }
20860}
20861
20862/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20863///
20864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
20865#[inline]
20866#[target_feature(enable = "avx512f")]
20867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20868#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20869#[rustc_legacy_const_generics(2)]
20870pub fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20871    unsafe {
20872        static_assert_uimm_bits!(IMM8, 8);
20873        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
20874        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
20875    }
20876}
20877
20878/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20879///
20880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
20881#[inline]
20882#[target_feature(enable = "avx512f,avx512vl")]
20883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20884#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20885#[rustc_legacy_const_generics(3)]
20886pub fn _mm256_mask_srai_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20887    unsafe {
20888        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
20889        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
20890    }
20891}
20892
20893/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20894///
20895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
20896#[inline]
20897#[target_feature(enable = "avx512f,avx512vl")]
20898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20899#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20900#[rustc_legacy_const_generics(2)]
20901pub fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20902    unsafe {
20903        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
20904        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
20905    }
20906}
20907
20908/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20909///
20910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
20911#[inline]
20912#[target_feature(enable = "avx512f,avx512vl")]
20913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20914#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20915#[rustc_legacy_const_generics(3)]
20916pub fn _mm_mask_srai_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20917    unsafe {
20918        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
20919        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
20920    }
20921}
20922
20923/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20924///
20925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
20926#[inline]
20927#[target_feature(enable = "avx512f,avx512vl")]
20928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20929#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20930#[rustc_legacy_const_generics(2)]
20931pub fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20932    unsafe {
20933        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
20934        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
20935    }
20936}
20937
20938/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20939///
20940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
20941#[inline]
20942#[target_feature(enable = "avx512f")]
20943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20944#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20945#[rustc_legacy_const_generics(1)]
20946pub fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20947    unsafe {
20948        static_assert_uimm_bits!(IMM8, 8);
20949        transmute(simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)))
20950    }
20951}
20952
20953/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20954///
20955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
20956#[inline]
20957#[target_feature(enable = "avx512f")]
20958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20959#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20960#[rustc_legacy_const_generics(3)]
20961pub fn _mm512_mask_srai_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20962    unsafe {
20963        static_assert_uimm_bits!(IMM8, 8);
20964        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
20965        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20966    }
20967}
20968
20969/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20970///
20971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
20972#[inline]
20973#[target_feature(enable = "avx512f")]
20974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20975#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20976#[rustc_legacy_const_generics(2)]
20977pub fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20978    unsafe {
20979        static_assert_uimm_bits!(IMM8, 8);
20980        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
20981        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20982    }
20983}
20984
20985/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20986///
20987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
20988#[inline]
20989#[target_feature(enable = "avx512f,avx512vl")]
20990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20991#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20992#[rustc_legacy_const_generics(1)]
20993pub fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
20994    unsafe {
20995        static_assert_uimm_bits!(IMM8, 8);
20996        transmute(simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)))
20997    }
20998}
20999
21000/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21001///
21002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
21003#[inline]
21004#[target_feature(enable = "avx512f,avx512vl")]
21005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21006#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21007#[rustc_legacy_const_generics(3)]
21008pub fn _mm256_mask_srai_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
21009    unsafe {
21010        static_assert_uimm_bits!(IMM8, 8);
21011        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
21012        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21013    }
21014}
21015
21016/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21017///
21018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
21019#[inline]
21020#[target_feature(enable = "avx512f,avx512vl")]
21021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21022#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21023#[rustc_legacy_const_generics(2)]
21024pub fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
21025    unsafe {
21026        static_assert_uimm_bits!(IMM8, 8);
21027        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
21028        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21029    }
21030}
21031
21032/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21033///
21034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
21035#[inline]
21036#[target_feature(enable = "avx512f,avx512vl")]
21037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21038#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21039#[rustc_legacy_const_generics(1)]
21040pub fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
21041    unsafe {
21042        static_assert_uimm_bits!(IMM8, 8);
21043        transmute(simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)))
21044    }
21045}
21046
21047/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21048///
21049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
21050#[inline]
21051#[target_feature(enable = "avx512f,avx512vl")]
21052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21053#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21054#[rustc_legacy_const_generics(3)]
21055pub fn _mm_mask_srai_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
21056    unsafe {
21057        static_assert_uimm_bits!(IMM8, 8);
21058        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
21059        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21060    }
21061}
21062
21063/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21064///
21065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
21066#[inline]
21067#[target_feature(enable = "avx512f,avx512vl")]
21068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21069#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21070#[rustc_legacy_const_generics(2)]
21071pub fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
21072    unsafe {
21073        static_assert_uimm_bits!(IMM8, 8);
21074        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
21075        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21076    }
21077}
21078
21079/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21080///
21081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
21082#[inline]
21083#[target_feature(enable = "avx512f")]
21084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21085#[cfg_attr(test, assert_instr(vpsravd))]
21086pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
21087    unsafe { transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) }
21088}
21089
21090/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21091///
21092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
21093#[inline]
21094#[target_feature(enable = "avx512f")]
21095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21096#[cfg_attr(test, assert_instr(vpsravd))]
21097pub fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21098    unsafe {
21099        let shf = _mm512_srav_epi32(a, count).as_i32x16();
21100        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21101    }
21102}
21103
21104/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21105///
21106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
21107#[inline]
21108#[target_feature(enable = "avx512f")]
21109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21110#[cfg_attr(test, assert_instr(vpsravd))]
21111pub fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21112    unsafe {
21113        let shf = _mm512_srav_epi32(a, count).as_i32x16();
21114        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21115    }
21116}
21117
21118/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21119///
21120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
21121#[inline]
21122#[target_feature(enable = "avx512f,avx512vl")]
21123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21124#[cfg_attr(test, assert_instr(vpsravd))]
21125pub fn _mm256_mask_srav_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21126    unsafe {
21127        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21128        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21129    }
21130}
21131
21132/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21133///
21134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
21135#[inline]
21136#[target_feature(enable = "avx512f,avx512vl")]
21137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21138#[cfg_attr(test, assert_instr(vpsravd))]
21139pub fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21140    unsafe {
21141        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21142        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21143    }
21144}
21145
21146/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21147///
21148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
21149#[inline]
21150#[target_feature(enable = "avx512f,avx512vl")]
21151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21152#[cfg_attr(test, assert_instr(vpsravd))]
21153pub fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21154    unsafe {
21155        let shf = _mm_srav_epi32(a, count).as_i32x4();
21156        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21157    }
21158}
21159
21160/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21161///
21162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
21163#[inline]
21164#[target_feature(enable = "avx512f,avx512vl")]
21165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21166#[cfg_attr(test, assert_instr(vpsravd))]
21167pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21168    unsafe {
21169        let shf = _mm_srav_epi32(a, count).as_i32x4();
21170        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21171    }
21172}
21173
21174/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21175///
21176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
21177#[inline]
21178#[target_feature(enable = "avx512f")]
21179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21180#[cfg_attr(test, assert_instr(vpsravq))]
21181pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
21182    unsafe { transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) }
21183}
21184
21185/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21186///
21187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
21188#[inline]
21189#[target_feature(enable = "avx512f")]
21190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21191#[cfg_attr(test, assert_instr(vpsravq))]
21192pub fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21193    unsafe {
21194        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21195        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21196    }
21197}
21198
21199/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21200///
21201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
21202#[inline]
21203#[target_feature(enable = "avx512f")]
21204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21205#[cfg_attr(test, assert_instr(vpsravq))]
21206pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21207    unsafe {
21208        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21209        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21210    }
21211}
21212
21213/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21214///
21215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
21216#[inline]
21217#[target_feature(enable = "avx512f,avx512vl")]
21218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21219#[cfg_attr(test, assert_instr(vpsravq))]
21220pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
21221    unsafe { transmute(vpsravq256(a.as_i64x4(), count.as_i64x4())) }
21222}
21223
21224/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21225///
21226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
21227#[inline]
21228#[target_feature(enable = "avx512f,avx512vl")]
21229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21230#[cfg_attr(test, assert_instr(vpsravq))]
21231pub fn _mm256_mask_srav_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21232    unsafe {
21233        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21234        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21235    }
21236}
21237
21238/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21239///
21240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
21241#[inline]
21242#[target_feature(enable = "avx512f,avx512vl")]
21243#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21244#[cfg_attr(test, assert_instr(vpsravq))]
21245pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21246    unsafe {
21247        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21248        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21249    }
21250}
21251
21252/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21253///
21254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
21255#[inline]
21256#[target_feature(enable = "avx512f,avx512vl")]
21257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21258#[cfg_attr(test, assert_instr(vpsravq))]
21259pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
21260    unsafe { transmute(vpsravq128(a.as_i64x2(), count.as_i64x2())) }
21261}
21262
21263/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21264///
21265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
21266#[inline]
21267#[target_feature(enable = "avx512f,avx512vl")]
21268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21269#[cfg_attr(test, assert_instr(vpsravq))]
21270pub fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21271    unsafe {
21272        let shf = _mm_srav_epi64(a, count).as_i64x2();
21273        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21274    }
21275}
21276
21277/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21278///
21279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
21280#[inline]
21281#[target_feature(enable = "avx512f,avx512vl")]
21282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21283#[cfg_attr(test, assert_instr(vpsravq))]
21284pub fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21285    unsafe {
21286        let shf = _mm_srav_epi64(a, count).as_i64x2();
21287        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21288    }
21289}
21290
21291/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21292///
21293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
21294#[inline]
21295#[target_feature(enable = "avx512f")]
21296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21297#[cfg_attr(test, assert_instr(vprolvd))]
21298pub fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
21299    unsafe { transmute(vprolvd(a.as_i32x16(), b.as_i32x16())) }
21300}
21301
21302/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21303///
21304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
21305#[inline]
21306#[target_feature(enable = "avx512f")]
21307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21308#[cfg_attr(test, assert_instr(vprolvd))]
21309pub fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21310    unsafe {
21311        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21312        transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
21313    }
21314}
21315
21316/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21317///
21318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
21319#[inline]
21320#[target_feature(enable = "avx512f")]
21321#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21322#[cfg_attr(test, assert_instr(vprolvd))]
21323pub fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21324    unsafe {
21325        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21326        transmute(simd_select_bitmask(k, rol, i32x16::ZERO))
21327    }
21328}
21329
21330/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21331///
21332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
21333#[inline]
21334#[target_feature(enable = "avx512f,avx512vl")]
21335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21336#[cfg_attr(test, assert_instr(vprolvd))]
21337pub fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
21338    unsafe { transmute(vprolvd256(a.as_i32x8(), b.as_i32x8())) }
21339}
21340
21341/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21342///
21343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi32&expand=4698)
21344#[inline]
21345#[target_feature(enable = "avx512f,avx512vl")]
21346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21347#[cfg_attr(test, assert_instr(vprolvd))]
21348pub fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21349    unsafe {
21350        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21351        transmute(simd_select_bitmask(k, rol, src.as_i32x8()))
21352    }
21353}
21354
21355/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21356///
21357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
21358#[inline]
21359#[target_feature(enable = "avx512f,avx512vl")]
21360#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21361#[cfg_attr(test, assert_instr(vprolvd))]
21362pub fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21363    unsafe {
21364        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21365        transmute(simd_select_bitmask(k, rol, i32x8::ZERO))
21366    }
21367}
21368
21369/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21370///
21371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
21372#[inline]
21373#[target_feature(enable = "avx512f,avx512vl")]
21374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21375#[cfg_attr(test, assert_instr(vprolvd))]
21376pub fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
21377    unsafe { transmute(vprolvd128(a.as_i32x4(), b.as_i32x4())) }
21378}
21379
21380/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21381///
21382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
21383#[inline]
21384#[target_feature(enable = "avx512f,avx512vl")]
21385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21386#[cfg_attr(test, assert_instr(vprolvd))]
21387pub fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21388    unsafe {
21389        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21390        transmute(simd_select_bitmask(k, rol, src.as_i32x4()))
21391    }
21392}
21393
21394/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21395///
21396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
21397#[inline]
21398#[target_feature(enable = "avx512f,avx512vl")]
21399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21400#[cfg_attr(test, assert_instr(vprolvd))]
21401pub fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21402    unsafe {
21403        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21404        transmute(simd_select_bitmask(k, rol, i32x4::ZERO))
21405    }
21406}
21407
21408/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21409///
21410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
21411#[inline]
21412#[target_feature(enable = "avx512f")]
21413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21414#[cfg_attr(test, assert_instr(vprorvd))]
21415pub fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
21416    unsafe { transmute(vprorvd(a.as_i32x16(), b.as_i32x16())) }
21417}
21418
21419/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21420///
21421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
21422#[inline]
21423#[target_feature(enable = "avx512f")]
21424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21425#[cfg_attr(test, assert_instr(vprorvd))]
21426pub fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21427    unsafe {
21428        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21429        transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
21430    }
21431}
21432
21433/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21434///
21435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
21436#[inline]
21437#[target_feature(enable = "avx512f")]
21438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21439#[cfg_attr(test, assert_instr(vprorvd))]
21440pub fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21441    unsafe {
21442        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21443        transmute(simd_select_bitmask(k, ror, i32x16::ZERO))
21444    }
21445}
21446
21447/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21448///
21449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
21450#[inline]
21451#[target_feature(enable = "avx512f,avx512vl")]
21452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21453#[cfg_attr(test, assert_instr(vprorvd))]
21454pub fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
21455    unsafe { transmute(vprorvd256(a.as_i32x8(), b.as_i32x8())) }
21456}
21457
21458/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21459///
21460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
21461#[inline]
21462#[target_feature(enable = "avx512f,avx512vl")]
21463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21464#[cfg_attr(test, assert_instr(vprorvd))]
21465pub fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21466    unsafe {
21467        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
21468        transmute(simd_select_bitmask(k, ror, src.as_i32x8()))
21469    }
21470}
21471
21472/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21473///
21474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
21475#[inline]
21476#[target_feature(enable = "avx512f,avx512vl")]
21477#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21478#[cfg_attr(test, assert_instr(vprorvd))]
21479pub fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21480    unsafe {
21481        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
21482        transmute(simd_select_bitmask(k, ror, i32x8::ZERO))
21483    }
21484}
21485
21486/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21487///
21488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
21489#[inline]
21490#[target_feature(enable = "avx512f,avx512vl")]
21491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21492#[cfg_attr(test, assert_instr(vprorvd))]
21493pub fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
21494    unsafe { transmute(vprorvd128(a.as_i32x4(), b.as_i32x4())) }
21495}
21496
21497/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21498///
21499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
21500#[inline]
21501#[target_feature(enable = "avx512f,avx512vl")]
21502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21503#[cfg_attr(test, assert_instr(vprorvd))]
21504pub fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21505    unsafe {
21506        let ror = _mm_rorv_epi32(a, b).as_i32x4();
21507        transmute(simd_select_bitmask(k, ror, src.as_i32x4()))
21508    }
21509}
21510
21511/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21512///
21513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
21514#[inline]
21515#[target_feature(enable = "avx512f,avx512vl")]
21516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21517#[cfg_attr(test, assert_instr(vprorvd))]
21518pub fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21519    unsafe {
21520        let ror = _mm_rorv_epi32(a, b).as_i32x4();
21521        transmute(simd_select_bitmask(k, ror, i32x4::ZERO))
21522    }
21523}
21524
21525/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21526///
21527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
21528#[inline]
21529#[target_feature(enable = "avx512f")]
21530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21531#[cfg_attr(test, assert_instr(vprolvq))]
21532pub fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
21533    unsafe { transmute(vprolvq(a.as_i64x8(), b.as_i64x8())) }
21534}
21535
21536/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21537///
21538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
21539#[inline]
21540#[target_feature(enable = "avx512f")]
21541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21542#[cfg_attr(test, assert_instr(vprolvq))]
21543pub fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21544    unsafe {
21545        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
21546        transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
21547    }
21548}
21549
21550/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21551///
21552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
21553#[inline]
21554#[target_feature(enable = "avx512f")]
21555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21556#[cfg_attr(test, assert_instr(vprolvq))]
21557pub fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21558    unsafe {
21559        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
21560        transmute(simd_select_bitmask(k, rol, i64x8::ZERO))
21561    }
21562}
21563
21564/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21565///
21566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
21567#[inline]
21568#[target_feature(enable = "avx512f,avx512vl")]
21569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21570#[cfg_attr(test, assert_instr(vprolvq))]
21571pub fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
21572    unsafe { transmute(vprolvq256(a.as_i64x4(), b.as_i64x4())) }
21573}
21574
21575/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21576///
21577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
21578#[inline]
21579#[target_feature(enable = "avx512f,avx512vl")]
21580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21581#[cfg_attr(test, assert_instr(vprolvq))]
21582pub fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21583    unsafe {
21584        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
21585        transmute(simd_select_bitmask(k, rol, src.as_i64x4()))
21586    }
21587}
21588
21589/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21590///
21591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
21592#[inline]
21593#[target_feature(enable = "avx512f,avx512vl")]
21594#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21595#[cfg_attr(test, assert_instr(vprolvq))]
21596pub fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21597    unsafe {
21598        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
21599        transmute(simd_select_bitmask(k, rol, i64x4::ZERO))
21600    }
21601}
21602
21603/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21604///
21605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
21606#[inline]
21607#[target_feature(enable = "avx512f,avx512vl")]
21608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21609#[cfg_attr(test, assert_instr(vprolvq))]
21610pub fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
21611    unsafe { transmute(vprolvq128(a.as_i64x2(), b.as_i64x2())) }
21612}
21613
21614/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21615///
21616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
21617#[inline]
21618#[target_feature(enable = "avx512f,avx512vl")]
21619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21620#[cfg_attr(test, assert_instr(vprolvq))]
21621pub fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21622    unsafe {
21623        let rol = _mm_rolv_epi64(a, b).as_i64x2();
21624        transmute(simd_select_bitmask(k, rol, src.as_i64x2()))
21625    }
21626}
21627
21628/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21629///
21630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
21631#[inline]
21632#[target_feature(enable = "avx512f,avx512vl")]
21633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21634#[cfg_attr(test, assert_instr(vprolvq))]
21635pub fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21636    unsafe {
21637        let rol = _mm_rolv_epi64(a, b).as_i64x2();
21638        transmute(simd_select_bitmask(k, rol, i64x2::ZERO))
21639    }
21640}
21641
21642/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21643///
21644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
21645#[inline]
21646#[target_feature(enable = "avx512f")]
21647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21648#[cfg_attr(test, assert_instr(vprorvq))]
21649pub fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
21650    unsafe { transmute(vprorvq(a.as_i64x8(), b.as_i64x8())) }
21651}
21652
21653/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21654///
21655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
21656#[inline]
21657#[target_feature(enable = "avx512f")]
21658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21659#[cfg_attr(test, assert_instr(vprorvq))]
21660pub fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21661    unsafe {
21662        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
21663        transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
21664    }
21665}
21666
21667/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21668///
21669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
21670#[inline]
21671#[target_feature(enable = "avx512f")]
21672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21673#[cfg_attr(test, assert_instr(vprorvq))]
21674pub fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21675    unsafe {
21676        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
21677        transmute(simd_select_bitmask(k, ror, i64x8::ZERO))
21678    }
21679}
21680
21681/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21682///
21683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
21684#[inline]
21685#[target_feature(enable = "avx512f,avx512vl")]
21686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21687#[cfg_attr(test, assert_instr(vprorvq))]
21688pub fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
21689    unsafe { transmute(vprorvq256(a.as_i64x4(), b.as_i64x4())) }
21690}
21691
21692/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21693///
21694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
21695#[inline]
21696#[target_feature(enable = "avx512f,avx512vl")]
21697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21698#[cfg_attr(test, assert_instr(vprorvq))]
21699pub fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21700    unsafe {
21701        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
21702        transmute(simd_select_bitmask(k, ror, src.as_i64x4()))
21703    }
21704}
21705
21706/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21707///
21708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
21709#[inline]
21710#[target_feature(enable = "avx512f,avx512vl")]
21711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21712#[cfg_attr(test, assert_instr(vprorvq))]
21713pub fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21714    unsafe {
21715        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
21716        transmute(simd_select_bitmask(k, ror, i64x4::ZERO))
21717    }
21718}
21719
21720/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21721///
21722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
21723#[inline]
21724#[target_feature(enable = "avx512f,avx512vl")]
21725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21726#[cfg_attr(test, assert_instr(vprorvq))]
21727pub fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
21728    unsafe { transmute(vprorvq128(a.as_i64x2(), b.as_i64x2())) }
21729}
21730
21731/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21732///
21733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
21734#[inline]
21735#[target_feature(enable = "avx512f,avx512vl")]
21736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21737#[cfg_attr(test, assert_instr(vprorvq))]
21738pub fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21739    unsafe {
21740        let ror = _mm_rorv_epi64(a, b).as_i64x2();
21741        transmute(simd_select_bitmask(k, ror, src.as_i64x2()))
21742    }
21743}
21744
21745/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21746///
21747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
21748#[inline]
21749#[target_feature(enable = "avx512f,avx512vl")]
21750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21751#[cfg_attr(test, assert_instr(vprorvq))]
21752pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21753    unsafe {
21754        let ror = _mm_rorv_epi64(a, b).as_i64x2();
21755        transmute(simd_select_bitmask(k, ror, i64x2::ZERO))
21756    }
21757}
21758
21759/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21760///
21761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
21762#[inline]
21763#[target_feature(enable = "avx512f")]
21764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21765#[cfg_attr(test, assert_instr(vpsllvd))]
21766pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
21767    unsafe { transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) }
21768}
21769
21770/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21771///
21772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
21773#[inline]
21774#[target_feature(enable = "avx512f")]
21775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21776#[cfg_attr(test, assert_instr(vpsllvd))]
21777pub fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21778    unsafe {
21779        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
21780        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21781    }
21782}
21783
21784/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21785///
21786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
21787#[inline]
21788#[target_feature(enable = "avx512f")]
21789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21790#[cfg_attr(test, assert_instr(vpsllvd))]
21791pub fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21792    unsafe {
21793        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
21794        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21795    }
21796}
21797
21798/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21799///
21800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
21801#[inline]
21802#[target_feature(enable = "avx512f,avx512vl")]
21803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21804#[cfg_attr(test, assert_instr(vpsllvd))]
21805pub fn _mm256_mask_sllv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21806    unsafe {
21807        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
21808        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21809    }
21810}
21811
21812/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21813///
21814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
21815#[inline]
21816#[target_feature(enable = "avx512f,avx512vl")]
21817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21818#[cfg_attr(test, assert_instr(vpsllvd))]
21819pub fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21820    unsafe {
21821        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
21822        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21823    }
21824}
21825
21826/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21827///
21828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
21829#[inline]
21830#[target_feature(enable = "avx512f,avx512vl")]
21831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21832#[cfg_attr(test, assert_instr(vpsllvd))]
21833pub fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21834    unsafe {
21835        let shf = _mm_sllv_epi32(a, count).as_i32x4();
21836        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21837    }
21838}
21839
21840/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21841///
21842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
21843#[inline]
21844#[target_feature(enable = "avx512f,avx512vl")]
21845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21846#[cfg_attr(test, assert_instr(vpsllvd))]
21847pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21848    unsafe {
21849        let shf = _mm_sllv_epi32(a, count).as_i32x4();
21850        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21851    }
21852}
21853
21854/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21855///
21856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
21857#[inline]
21858#[target_feature(enable = "avx512f")]
21859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21860#[cfg_attr(test, assert_instr(vpsrlvd))]
21861pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
21862    unsafe { transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) }
21863}
21864
21865/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21866///
21867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
21868#[inline]
21869#[target_feature(enable = "avx512f")]
21870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21871#[cfg_attr(test, assert_instr(vpsrlvd))]
21872pub fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21873    unsafe {
21874        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
21875        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21876    }
21877}
21878
21879/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21880///
21881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
21882#[inline]
21883#[target_feature(enable = "avx512f")]
21884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21885#[cfg_attr(test, assert_instr(vpsrlvd))]
21886pub fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21887    unsafe {
21888        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
21889        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21890    }
21891}
21892
21893/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21894///
21895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
21896#[inline]
21897#[target_feature(enable = "avx512f,avx512vl")]
21898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21899#[cfg_attr(test, assert_instr(vpsrlvd))]
21900pub fn _mm256_mask_srlv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21901    unsafe {
21902        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
21903        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21904    }
21905}
21906
21907/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21908///
21909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
21910#[inline]
21911#[target_feature(enable = "avx512f,avx512vl")]
21912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21913#[cfg_attr(test, assert_instr(vpsrlvd))]
21914pub fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21915    unsafe {
21916        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
21917        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21918    }
21919}
21920
21921/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21922///
21923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
21924#[inline]
21925#[target_feature(enable = "avx512f,avx512vl")]
21926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21927#[cfg_attr(test, assert_instr(vpsrlvd))]
21928pub fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21929    unsafe {
21930        let shf = _mm_srlv_epi32(a, count).as_i32x4();
21931        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21932    }
21933}
21934
21935/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21936///
21937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
21938#[inline]
21939#[target_feature(enable = "avx512f,avx512vl")]
21940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21941#[cfg_attr(test, assert_instr(vpsrlvd))]
21942pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21943    unsafe {
21944        let shf = _mm_srlv_epi32(a, count).as_i32x4();
21945        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21946    }
21947}
21948
21949/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21950///
21951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
21952#[inline]
21953#[target_feature(enable = "avx512f")]
21954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21955#[cfg_attr(test, assert_instr(vpsllvq))]
21956pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
21957    unsafe { transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) }
21958}
21959
21960/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21961///
21962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
21963#[inline]
21964#[target_feature(enable = "avx512f")]
21965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21966#[cfg_attr(test, assert_instr(vpsllvq))]
21967pub fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21968    unsafe {
21969        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
21970        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21971    }
21972}
21973
21974/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21975///
21976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
21977#[inline]
21978#[target_feature(enable = "avx512f")]
21979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21980#[cfg_attr(test, assert_instr(vpsllvq))]
21981pub fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21982    unsafe {
21983        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
21984        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21985    }
21986}
21987
21988/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21989///
21990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
21991#[inline]
21992#[target_feature(enable = "avx512f,avx512vl")]
21993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21994#[cfg_attr(test, assert_instr(vpsllvq))]
21995pub fn _mm256_mask_sllv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21996    unsafe {
21997        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
21998        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21999    }
22000}
22001
22002/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22003///
22004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
22005#[inline]
22006#[target_feature(enable = "avx512f,avx512vl")]
22007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22008#[cfg_attr(test, assert_instr(vpsllvq))]
22009pub fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22010    unsafe {
22011        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
22012        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
22013    }
22014}
22015
22016/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22017///
22018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
22019#[inline]
22020#[target_feature(enable = "avx512f,avx512vl")]
22021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22022#[cfg_attr(test, assert_instr(vpsllvq))]
22023pub fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22024    unsafe {
22025        let shf = _mm_sllv_epi64(a, count).as_i64x2();
22026        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
22027    }
22028}
22029
22030/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22031///
22032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
22033#[inline]
22034#[target_feature(enable = "avx512f,avx512vl")]
22035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22036#[cfg_attr(test, assert_instr(vpsllvq))]
22037pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22038    unsafe {
22039        let shf = _mm_sllv_epi64(a, count).as_i64x2();
22040        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
22041    }
22042}
22043
22044/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22045///
22046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
22047#[inline]
22048#[target_feature(enable = "avx512f")]
22049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22050#[cfg_attr(test, assert_instr(vpsrlvq))]
22051pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
22052    unsafe { transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) }
22053}
22054
22055/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22056///
22057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
22058#[inline]
22059#[target_feature(enable = "avx512f")]
22060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22061#[cfg_attr(test, assert_instr(vpsrlvq))]
22062pub fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22063    unsafe {
22064        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
22065        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
22066    }
22067}
22068
22069/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22070///
22071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
22072#[inline]
22073#[target_feature(enable = "avx512f")]
22074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22075#[cfg_attr(test, assert_instr(vpsrlvq))]
22076pub fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22077    unsafe {
22078        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
22079        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
22080    }
22081}
22082
22083/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22084///
22085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
22086#[inline]
22087#[target_feature(enable = "avx512f,avx512vl")]
22088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22089#[cfg_attr(test, assert_instr(vpsrlvq))]
22090pub fn _mm256_mask_srlv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22091    unsafe {
22092        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22093        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
22094    }
22095}
22096
22097/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22098///
22099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
22100#[inline]
22101#[target_feature(enable = "avx512f,avx512vl")]
22102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22103#[cfg_attr(test, assert_instr(vpsrlvq))]
22104pub fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22105    unsafe {
22106        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22107        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
22108    }
22109}
22110
22111/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22112///
22113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
22114#[inline]
22115#[target_feature(enable = "avx512f,avx512vl")]
22116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22117#[cfg_attr(test, assert_instr(vpsrlvq))]
22118pub fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22119    unsafe {
22120        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22121        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
22122    }
22123}
22124
22125/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22126///
22127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
22128#[inline]
22129#[target_feature(enable = "avx512f,avx512vl")]
22130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22131#[cfg_attr(test, assert_instr(vpsrlvq))]
22132pub fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22133    unsafe {
22134        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22135        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
22136    }
22137}
22138
22139/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22140///
22141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
22142#[inline]
22143#[target_feature(enable = "avx512f")]
22144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22145#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22146#[rustc_legacy_const_generics(1)]
22147pub fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
22148    unsafe {
22149        static_assert_uimm_bits!(MASK, 8);
22150        simd_shuffle!(
22151            a,
22152            a,
22153            [
22154                MASK as u32 & 0b11,
22155                (MASK as u32 >> 2) & 0b11,
22156                ((MASK as u32 >> 4) & 0b11),
22157                ((MASK as u32 >> 6) & 0b11),
22158                (MASK as u32 & 0b11) + 4,
22159                ((MASK as u32 >> 2) & 0b11) + 4,
22160                ((MASK as u32 >> 4) & 0b11) + 4,
22161                ((MASK as u32 >> 6) & 0b11) + 4,
22162                (MASK as u32 & 0b11) + 8,
22163                ((MASK as u32 >> 2) & 0b11) + 8,
22164                ((MASK as u32 >> 4) & 0b11) + 8,
22165                ((MASK as u32 >> 6) & 0b11) + 8,
22166                (MASK as u32 & 0b11) + 12,
22167                ((MASK as u32 >> 2) & 0b11) + 12,
22168                ((MASK as u32 >> 4) & 0b11) + 12,
22169                ((MASK as u32 >> 6) & 0b11) + 12,
22170            ],
22171        )
22172    }
22173}
22174
22175/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22176///
22177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
22178#[inline]
22179#[target_feature(enable = "avx512f")]
22180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22181#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22182#[rustc_legacy_const_generics(3)]
22183pub fn _mm512_mask_permute_ps<const MASK: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
22184    unsafe {
22185        static_assert_uimm_bits!(MASK, 8);
22186        let r = _mm512_permute_ps::<MASK>(a);
22187        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
22188    }
22189}
22190
22191/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22192///
22193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
22194#[inline]
22195#[target_feature(enable = "avx512f")]
22196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22197#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22198#[rustc_legacy_const_generics(2)]
22199pub fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
22200    unsafe {
22201        static_assert_uimm_bits!(MASK, 8);
22202        let r = _mm512_permute_ps::<MASK>(a);
22203        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
22204    }
22205}
22206
22207/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22208///
22209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
22210#[inline]
22211#[target_feature(enable = "avx512f,avx512vl")]
22212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22213#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22214#[rustc_legacy_const_generics(3)]
22215pub fn _mm256_mask_permute_ps<const MASK: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
22216    unsafe {
22217        let r = _mm256_permute_ps::<MASK>(a);
22218        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
22219    }
22220}
22221
22222/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22223///
22224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
22225#[inline]
22226#[target_feature(enable = "avx512f,avx512vl")]
22227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22228#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22229#[rustc_legacy_const_generics(2)]
22230pub fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
22231    unsafe {
22232        let r = _mm256_permute_ps::<MASK>(a);
22233        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
22234    }
22235}
22236
22237/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22238///
22239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
22240#[inline]
22241#[target_feature(enable = "avx512f,avx512vl")]
22242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22243#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22244#[rustc_legacy_const_generics(3)]
22245pub fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
22246    unsafe {
22247        let r = _mm_permute_ps::<MASK>(a);
22248        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
22249    }
22250}
22251
22252/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22253///
22254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
22255#[inline]
22256#[target_feature(enable = "avx512f,avx512vl")]
22257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22258#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22259#[rustc_legacy_const_generics(2)]
22260pub fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
22261    unsafe {
22262        let r = _mm_permute_ps::<MASK>(a);
22263        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
22264    }
22265}
22266
22267/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22268///
22269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
22270#[inline]
22271#[target_feature(enable = "avx512f")]
22272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22273#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22274#[rustc_legacy_const_generics(1)]
22275pub fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
22276    unsafe {
22277        static_assert_uimm_bits!(MASK, 8);
22278        simd_shuffle!(
22279            a,
22280            a,
22281            [
22282                MASK as u32 & 0b1,
22283                ((MASK as u32 >> 1) & 0b1),
22284                ((MASK as u32 >> 2) & 0b1) + 2,
22285                ((MASK as u32 >> 3) & 0b1) + 2,
22286                ((MASK as u32 >> 4) & 0b1) + 4,
22287                ((MASK as u32 >> 5) & 0b1) + 4,
22288                ((MASK as u32 >> 6) & 0b1) + 6,
22289                ((MASK as u32 >> 7) & 0b1) + 6,
22290            ],
22291        )
22292    }
22293}
22294
22295/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22296///
22297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
22298#[inline]
22299#[target_feature(enable = "avx512f")]
22300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22301#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22302#[rustc_legacy_const_generics(3)]
22303pub fn _mm512_mask_permute_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22304    unsafe {
22305        static_assert_uimm_bits!(MASK, 8);
22306        let r = _mm512_permute_pd::<MASK>(a);
22307        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
22308    }
22309}
22310
22311/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22312///
22313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
22314#[inline]
22315#[target_feature(enable = "avx512f")]
22316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22317#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22318#[rustc_legacy_const_generics(2)]
22319pub fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22320    unsafe {
22321        static_assert_uimm_bits!(MASK, 8);
22322        let r = _mm512_permute_pd::<MASK>(a);
22323        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
22324    }
22325}
22326
22327/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22328///
22329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
22330#[inline]
22331#[target_feature(enable = "avx512f,avx512vl")]
22332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22333#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
22334#[rustc_legacy_const_generics(3)]
22335pub fn _mm256_mask_permute_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22336    unsafe {
22337        static_assert_uimm_bits!(MASK, 4);
22338        let r = _mm256_permute_pd::<MASK>(a);
22339        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
22340    }
22341}
22342
22343/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22344///
22345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
22346#[inline]
22347#[target_feature(enable = "avx512f,avx512vl")]
22348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22349#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
22350#[rustc_legacy_const_generics(2)]
22351pub fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22352    unsafe {
22353        static_assert_uimm_bits!(MASK, 4);
22354        let r = _mm256_permute_pd::<MASK>(a);
22355        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
22356    }
22357}
22358
22359/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22360///
22361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
22362#[inline]
22363#[target_feature(enable = "avx512f,avx512vl")]
22364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22365#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
22366#[rustc_legacy_const_generics(3)]
22367pub fn _mm_mask_permute_pd<const IMM2: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
22368    unsafe {
22369        static_assert_uimm_bits!(IMM2, 2);
22370        let r = _mm_permute_pd::<IMM2>(a);
22371        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
22372    }
22373}
22374
22375/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22376///
22377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
22378#[inline]
22379#[target_feature(enable = "avx512f,avx512vl")]
22380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22381#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
22382#[rustc_legacy_const_generics(2)]
22383pub fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
22384    unsafe {
22385        static_assert_uimm_bits!(IMM2, 2);
22386        let r = _mm_permute_pd::<IMM2>(a);
22387        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
22388    }
22389}
22390
22391/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22392///
22393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
22394#[inline]
22395#[target_feature(enable = "avx512f")]
22396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22397#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22398#[rustc_legacy_const_generics(1)]
22399pub fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
22400    unsafe {
22401        static_assert_uimm_bits!(MASK, 8);
22402        simd_shuffle!(
22403            a,
22404            a,
22405            [
22406                MASK as u32 & 0b11,
22407                (MASK as u32 >> 2) & 0b11,
22408                ((MASK as u32 >> 4) & 0b11),
22409                ((MASK as u32 >> 6) & 0b11),
22410                (MASK as u32 & 0b11) + 4,
22411                ((MASK as u32 >> 2) & 0b11) + 4,
22412                ((MASK as u32 >> 4) & 0b11) + 4,
22413                ((MASK as u32 >> 6) & 0b11) + 4,
22414            ],
22415        )
22416    }
22417}
22418
22419/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22420///
22421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
22422#[inline]
22423#[target_feature(enable = "avx512f")]
22424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22425#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22426#[rustc_legacy_const_generics(3)]
22427pub fn _mm512_mask_permutex_epi64<const MASK: i32>(
22428    src: __m512i,
22429    k: __mmask8,
22430    a: __m512i,
22431) -> __m512i {
22432    unsafe {
22433        static_assert_uimm_bits!(MASK, 8);
22434        let r = _mm512_permutex_epi64::<MASK>(a);
22435        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
22436    }
22437}
22438
22439/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22440///
22441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
22442#[inline]
22443#[target_feature(enable = "avx512f")]
22444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22445#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22446#[rustc_legacy_const_generics(2)]
22447pub fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
22448    unsafe {
22449        static_assert_uimm_bits!(MASK, 8);
22450        let r = _mm512_permutex_epi64::<MASK>(a);
22451        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
22452    }
22453}
22454
22455/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22456///
22457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
22458#[inline]
22459#[target_feature(enable = "avx512f,avx512vl")]
22460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22461#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22462#[rustc_legacy_const_generics(1)]
22463pub fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
22464    unsafe {
22465        static_assert_uimm_bits!(MASK, 8);
22466        simd_shuffle!(
22467            a,
22468            a,
22469            [
22470                MASK as u32 & 0b11,
22471                (MASK as u32 >> 2) & 0b11,
22472                ((MASK as u32 >> 4) & 0b11),
22473                ((MASK as u32 >> 6) & 0b11),
22474            ],
22475        )
22476    }
22477}
22478
22479/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22480///
22481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi64&expand=4203)
22482#[inline]
22483#[target_feature(enable = "avx512f,avx512vl")]
22484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22485#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22486#[rustc_legacy_const_generics(3)]
22487pub fn _mm256_mask_permutex_epi64<const MASK: i32>(
22488    src: __m256i,
22489    k: __mmask8,
22490    a: __m256i,
22491) -> __m256i {
22492    unsafe {
22493        static_assert_uimm_bits!(MASK, 8);
22494        let r = _mm256_permutex_epi64::<MASK>(a);
22495        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
22496    }
22497}
22498
22499/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22500///
22501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
22502#[inline]
22503#[target_feature(enable = "avx512f,avx512vl")]
22504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22505#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22506#[rustc_legacy_const_generics(2)]
22507pub fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
22508    unsafe {
22509        static_assert_uimm_bits!(MASK, 8);
22510        let r = _mm256_permutex_epi64::<MASK>(a);
22511        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
22512    }
22513}
22514
22515/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22516///
22517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
22518#[inline]
22519#[target_feature(enable = "avx512f")]
22520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22521#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22522#[rustc_legacy_const_generics(1)]
22523pub fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
22524    unsafe {
22525        static_assert_uimm_bits!(MASK, 8);
22526        simd_shuffle!(
22527            a,
22528            a,
22529            [
22530                MASK as u32 & 0b11,
22531                (MASK as u32 >> 2) & 0b11,
22532                ((MASK as u32 >> 4) & 0b11),
22533                ((MASK as u32 >> 6) & 0b11),
22534                (MASK as u32 & 0b11) + 4,
22535                ((MASK as u32 >> 2) & 0b11) + 4,
22536                ((MASK as u32 >> 4) & 0b11) + 4,
22537                ((MASK as u32 >> 6) & 0b11) + 4,
22538            ],
22539        )
22540    }
22541}
22542
22543/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22544///
22545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
22546#[inline]
22547#[target_feature(enable = "avx512f")]
22548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22549#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22550#[rustc_legacy_const_generics(3)]
22551pub fn _mm512_mask_permutex_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22552    unsafe {
22553        let r = _mm512_permutex_pd::<MASK>(a);
22554        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
22555    }
22556}
22557
22558/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22559///
22560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
22561#[inline]
22562#[target_feature(enable = "avx512f")]
22563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22564#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22565#[rustc_legacy_const_generics(2)]
22566pub fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22567    unsafe {
22568        let r = _mm512_permutex_pd::<MASK>(a);
22569        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
22570    }
22571}
22572
22573/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22574///
22575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
22576#[inline]
22577#[target_feature(enable = "avx512f,avx512vl")]
22578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22579#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22580#[rustc_legacy_const_generics(1)]
22581pub fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
22582    unsafe {
22583        static_assert_uimm_bits!(MASK, 8);
22584        simd_shuffle!(
22585            a,
22586            a,
22587            [
22588                MASK as u32 & 0b11,
22589                (MASK as u32 >> 2) & 0b11,
22590                ((MASK as u32 >> 4) & 0b11),
22591                ((MASK as u32 >> 6) & 0b11),
22592            ],
22593        )
22594    }
22595}
22596
22597/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22598///
22599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
22600#[inline]
22601#[target_feature(enable = "avx512f,avx512vl")]
22602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22603#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22604#[rustc_legacy_const_generics(3)]
22605pub fn _mm256_mask_permutex_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22606    unsafe {
22607        static_assert_uimm_bits!(MASK, 8);
22608        let r = _mm256_permutex_pd::<MASK>(a);
22609        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
22610    }
22611}
22612
22613/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22614///
22615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
22616#[inline]
22617#[target_feature(enable = "avx512f,avx512vl")]
22618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22619#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22620#[rustc_legacy_const_generics(2)]
22621pub fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22622    unsafe {
22623        static_assert_uimm_bits!(MASK, 8);
22624        let r = _mm256_permutex_pd::<MASK>(a);
22625        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
22626    }
22627}
22628
22629/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22630///
22631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
22632#[inline]
22633#[target_feature(enable = "avx512f")]
22634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22635#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22636pub fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22637    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
22638}
22639
22640/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22641///
22642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
22643#[inline]
22644#[target_feature(enable = "avx512f")]
22645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22646#[cfg_attr(test, assert_instr(vpermd))]
22647pub fn _mm512_mask_permutevar_epi32(
22648    src: __m512i,
22649    k: __mmask16,
22650    idx: __m512i,
22651    a: __m512i,
22652) -> __m512i {
22653    unsafe {
22654        let permute = _mm512_permutevar_epi32(idx, a).as_i32x16();
22655        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
22656    }
22657}
22658
22659/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22660///
22661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
22662#[inline]
22663#[target_feature(enable = "avx512f")]
22664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22665#[cfg_attr(test, assert_instr(vpermilps))]
22666pub fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
22667    unsafe { transmute(vpermilps(a.as_f32x16(), b.as_i32x16())) }
22668}
22669
22670/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22671///
22672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
22673#[inline]
22674#[target_feature(enable = "avx512f")]
22675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22676#[cfg_attr(test, assert_instr(vpermilps))]
22677pub fn _mm512_mask_permutevar_ps(src: __m512, k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22678    unsafe {
22679        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
22680        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
22681    }
22682}
22683
22684/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22685///
22686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
22687#[inline]
22688#[target_feature(enable = "avx512f")]
22689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22690#[cfg_attr(test, assert_instr(vpermilps))]
22691pub fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22692    unsafe {
22693        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
22694        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
22695    }
22696}
22697
22698/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22699///
22700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
22701#[inline]
22702#[target_feature(enable = "avx512f,avx512vl")]
22703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22704#[cfg_attr(test, assert_instr(vpermilps))]
22705pub fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22706    unsafe {
22707        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
22708        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
22709    }
22710}
22711
22712/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22713///
22714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
22715#[inline]
22716#[target_feature(enable = "avx512f,avx512vl")]
22717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22718#[cfg_attr(test, assert_instr(vpermilps))]
22719pub fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22720    unsafe {
22721        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
22722        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
22723    }
22724}
22725
22726/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22727///
22728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
22729#[inline]
22730#[target_feature(enable = "avx512f,avx512vl")]
22731#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22732#[cfg_attr(test, assert_instr(vpermilps))]
22733pub fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22734    unsafe {
22735        let permute = _mm_permutevar_ps(a, b).as_f32x4();
22736        transmute(simd_select_bitmask(k, permute, src.as_f32x4()))
22737    }
22738}
22739
22740/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22741///
22742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
22743#[inline]
22744#[target_feature(enable = "avx512f,avx512vl")]
22745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22746#[cfg_attr(test, assert_instr(vpermilps))]
22747pub fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22748    unsafe {
22749        let permute = _mm_permutevar_ps(a, b).as_f32x4();
22750        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
22751    }
22752}
22753
22754/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22755///
22756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
22757#[inline]
22758#[target_feature(enable = "avx512f")]
22759#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22760#[cfg_attr(test, assert_instr(vpermilpd))]
22761pub fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
22762    unsafe { transmute(vpermilpd(a.as_f64x8(), b.as_i64x8())) }
22763}
22764
22765/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22766///
22767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
22768#[inline]
22769#[target_feature(enable = "avx512f")]
22770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22771#[cfg_attr(test, assert_instr(vpermilpd))]
22772pub fn _mm512_mask_permutevar_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22773    unsafe {
22774        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
22775        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
22776    }
22777}
22778
22779/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22780///
22781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
22782#[inline]
22783#[target_feature(enable = "avx512f")]
22784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22785#[cfg_attr(test, assert_instr(vpermilpd))]
22786pub fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22787    unsafe {
22788        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
22789        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
22790    }
22791}
22792
22793/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22794///
22795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
22796#[inline]
22797#[target_feature(enable = "avx512f,avx512vl")]
22798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22799#[cfg_attr(test, assert_instr(vpermilpd))]
22800pub fn _mm256_mask_permutevar_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22801    unsafe {
22802        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
22803        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
22804    }
22805}
22806
22807/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22808///
22809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
22810#[inline]
22811#[target_feature(enable = "avx512f,avx512vl")]
22812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22813#[cfg_attr(test, assert_instr(vpermilpd))]
22814pub fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22815    unsafe {
22816        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
22817        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
22818    }
22819}
22820
22821/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22822///
22823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
22824#[inline]
22825#[target_feature(enable = "avx512f,avx512vl")]
22826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22827#[cfg_attr(test, assert_instr(vpermilpd))]
22828pub fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22829    unsafe {
22830        let permute = _mm_permutevar_pd(a, b).as_f64x2();
22831        transmute(simd_select_bitmask(k, permute, src.as_f64x2()))
22832    }
22833}
22834
22835/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22836///
22837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
22838#[inline]
22839#[target_feature(enable = "avx512f,avx512vl")]
22840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22841#[cfg_attr(test, assert_instr(vpermilpd))]
22842pub fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22843    unsafe {
22844        let permute = _mm_permutevar_pd(a, b).as_f64x2();
22845        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
22846    }
22847}
22848
22849/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22850///
22851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
22852#[inline]
22853#[target_feature(enable = "avx512f")]
22854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22855#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22856pub fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22857    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
22858}
22859
22860/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22861///
22862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
22863#[inline]
22864#[target_feature(enable = "avx512f")]
22865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22866#[cfg_attr(test, assert_instr(vpermd))]
22867pub fn _mm512_mask_permutexvar_epi32(
22868    src: __m512i,
22869    k: __mmask16,
22870    idx: __m512i,
22871    a: __m512i,
22872) -> __m512i {
22873    unsafe {
22874        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22875        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
22876    }
22877}
22878
22879/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22880///
22881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
22882#[inline]
22883#[target_feature(enable = "avx512f")]
22884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22885#[cfg_attr(test, assert_instr(vpermd))]
22886pub fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
22887    unsafe {
22888        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22889        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
22890    }
22891}
22892
22893/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22894///
22895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
22896#[inline]
22897#[target_feature(enable = "avx512f,avx512vl")]
22898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22899#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22900pub fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
22901    _mm256_permutevar8x32_epi32(a, idx) // llvm use llvm.x86.avx2.permd
22902}
22903
22904/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22905///
22906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
22907#[inline]
22908#[target_feature(enable = "avx512f,avx512vl")]
22909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22910#[cfg_attr(test, assert_instr(vpermd))]
22911pub fn _mm256_mask_permutexvar_epi32(
22912    src: __m256i,
22913    k: __mmask8,
22914    idx: __m256i,
22915    a: __m256i,
22916) -> __m256i {
22917    unsafe {
22918        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22919        transmute(simd_select_bitmask(k, permute, src.as_i32x8()))
22920    }
22921}
22922
22923/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22924///
22925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
22926#[inline]
22927#[target_feature(enable = "avx512f,avx512vl")]
22928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22929#[cfg_attr(test, assert_instr(vpermd))]
22930pub fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
22931    unsafe {
22932        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22933        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
22934    }
22935}
22936
22937/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22938///
22939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
22940#[inline]
22941#[target_feature(enable = "avx512f")]
22942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22943#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22944pub fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
22945    unsafe { transmute(vpermq(a.as_i64x8(), idx.as_i64x8())) }
22946}
22947
22948/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22949///
22950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
22951#[inline]
22952#[target_feature(enable = "avx512f")]
22953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22954#[cfg_attr(test, assert_instr(vpermq))]
22955pub fn _mm512_mask_permutexvar_epi64(
22956    src: __m512i,
22957    k: __mmask8,
22958    idx: __m512i,
22959    a: __m512i,
22960) -> __m512i {
22961    unsafe {
22962        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22963        transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
22964    }
22965}
22966
22967/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22968///
22969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
22970#[inline]
22971#[target_feature(enable = "avx512f")]
22972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22973#[cfg_attr(test, assert_instr(vpermq))]
22974pub fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
22975    unsafe {
22976        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22977        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
22978    }
22979}
22980
22981/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22982///
22983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
22984#[inline]
22985#[target_feature(enable = "avx512f,avx512vl")]
22986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22987#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22988pub fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
22989    unsafe { transmute(vpermq256(a.as_i64x4(), idx.as_i64x4())) }
22990}
22991
22992/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22993///
22994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
22995#[inline]
22996#[target_feature(enable = "avx512f,avx512vl")]
22997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22998#[cfg_attr(test, assert_instr(vpermq))]
22999pub fn _mm256_mask_permutexvar_epi64(
23000    src: __m256i,
23001    k: __mmask8,
23002    idx: __m256i,
23003    a: __m256i,
23004) -> __m256i {
23005    unsafe {
23006        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
23007        transmute(simd_select_bitmask(k, permute, src.as_i64x4()))
23008    }
23009}
23010
23011/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23012///
23013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
23014#[inline]
23015#[target_feature(enable = "avx512f,avx512vl")]
23016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23017#[cfg_attr(test, assert_instr(vpermq))]
23018pub fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
23019    unsafe {
23020        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
23021        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
23022    }
23023}
23024
23025/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23026///
23027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_ps&expand=4200)
23028#[inline]
23029#[target_feature(enable = "avx512f")]
23030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23031#[cfg_attr(test, assert_instr(vpermps))]
23032pub fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
23033    unsafe { transmute(vpermps(a.as_f32x16(), idx.as_i32x16())) }
23034}
23035
23036/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23037///
23038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
23039#[inline]
23040#[target_feature(enable = "avx512f")]
23041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23042#[cfg_attr(test, assert_instr(vpermps))]
23043pub fn _mm512_mask_permutexvar_ps(src: __m512, k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23044    unsafe {
23045        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
23046        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
23047    }
23048}
23049
23050/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23051///
23052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
23053#[inline]
23054#[target_feature(enable = "avx512f")]
23055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23056#[cfg_attr(test, assert_instr(vpermps))]
23057pub fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23058    unsafe {
23059        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
23060        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
23061    }
23062}
23063
23064/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23065///
23066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
23067#[inline]
23068#[target_feature(enable = "avx512f,avx512vl")]
23069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23070#[cfg_attr(test, assert_instr(vpermps))]
23071pub fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
23072    _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
23073}
23074
23075/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23076///
23077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
23078#[inline]
23079#[target_feature(enable = "avx512f,avx512vl")]
23080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23081#[cfg_attr(test, assert_instr(vpermps))]
23082pub fn _mm256_mask_permutexvar_ps(src: __m256, k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23083    unsafe {
23084        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23085        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
23086    }
23087}
23088
23089/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23090///
23091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
23092#[inline]
23093#[target_feature(enable = "avx512f,avx512vl")]
23094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23095#[cfg_attr(test, assert_instr(vpermps))]
23096pub fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23097    unsafe {
23098        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23099        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23100    }
23101}
23102
23103/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23104///
23105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
23106#[inline]
23107#[target_feature(enable = "avx512f")]
23108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23109#[cfg_attr(test, assert_instr(vpermpd))]
23110pub fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
23111    unsafe { transmute(vpermpd(a.as_f64x8(), idx.as_i64x8())) }
23112}
23113
23114/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23115///
23116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
23117#[inline]
23118#[target_feature(enable = "avx512f")]
23119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23120#[cfg_attr(test, assert_instr(vpermpd))]
23121pub fn _mm512_mask_permutexvar_pd(src: __m512d, k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23122    unsafe {
23123        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23124        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
23125    }
23126}
23127
23128/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23129///
23130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
23131#[inline]
23132#[target_feature(enable = "avx512f")]
23133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23134#[cfg_attr(test, assert_instr(vpermpd))]
23135pub fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23136    unsafe {
23137        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23138        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23139    }
23140}
23141
23142/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23143///
23144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
23145#[inline]
23146#[target_feature(enable = "avx512f,avx512vl")]
23147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23148#[cfg_attr(test, assert_instr(vpermpd))]
23149pub fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
23150    unsafe { transmute(vpermpd256(a.as_f64x4(), idx.as_i64x4())) }
23151}
23152
23153/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23154///
23155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
23156#[inline]
23157#[target_feature(enable = "avx512f,avx512vl")]
23158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23159#[cfg_attr(test, assert_instr(vpermpd))]
23160pub fn _mm256_mask_permutexvar_pd(src: __m256d, k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23161    unsafe {
23162        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23163        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
23164    }
23165}
23166
23167/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23168///
23169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
23170#[inline]
23171#[target_feature(enable = "avx512f,avx512vl")]
23172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23173#[cfg_attr(test, assert_instr(vpermpd))]
23174pub fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23175    unsafe {
23176        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23177        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23178    }
23179}
23180
23181/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23182///
23183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
23184#[inline]
23185#[target_feature(enable = "avx512f")]
23186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23187#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23188pub fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23189    unsafe { transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) }
23190}
23191
23192/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23193///
23194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
23195#[inline]
23196#[target_feature(enable = "avx512f")]
23197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23198#[cfg_attr(test, assert_instr(vpermt2d))]
23199pub fn _mm512_mask_permutex2var_epi32(
23200    a: __m512i,
23201    k: __mmask16,
23202    idx: __m512i,
23203    b: __m512i,
23204) -> __m512i {
23205    unsafe {
23206        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23207        transmute(simd_select_bitmask(k, permute, a.as_i32x16()))
23208    }
23209}
23210
23211/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23212///
23213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
23214#[inline]
23215#[target_feature(enable = "avx512f")]
23216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23217#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23218pub fn _mm512_maskz_permutex2var_epi32(
23219    k: __mmask16,
23220    a: __m512i,
23221    idx: __m512i,
23222    b: __m512i,
23223) -> __m512i {
23224    unsafe {
23225        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23226        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
23227    }
23228}
23229
23230/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23231///
23232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
23233#[inline]
23234#[target_feature(enable = "avx512f")]
23235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23236#[cfg_attr(test, assert_instr(vpermi2d))]
23237pub fn _mm512_mask2_permutex2var_epi32(
23238    a: __m512i,
23239    idx: __m512i,
23240    k: __mmask16,
23241    b: __m512i,
23242) -> __m512i {
23243    unsafe {
23244        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23245        transmute(simd_select_bitmask(k, permute, idx.as_i32x16()))
23246    }
23247}
23248
23249/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23250///
23251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
23252#[inline]
23253#[target_feature(enable = "avx512f,avx512vl")]
23254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23255#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23256pub fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23257    unsafe { transmute(vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) }
23258}
23259
23260/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23261///
23262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
23263#[inline]
23264#[target_feature(enable = "avx512f,avx512vl")]
23265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23266#[cfg_attr(test, assert_instr(vpermt2d))]
23267pub fn _mm256_mask_permutex2var_epi32(
23268    a: __m256i,
23269    k: __mmask8,
23270    idx: __m256i,
23271    b: __m256i,
23272) -> __m256i {
23273    unsafe {
23274        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23275        transmute(simd_select_bitmask(k, permute, a.as_i32x8()))
23276    }
23277}
23278
23279/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23280///
23281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
23282#[inline]
23283#[target_feature(enable = "avx512f,avx512vl")]
23284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23285#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23286pub fn _mm256_maskz_permutex2var_epi32(
23287    k: __mmask8,
23288    a: __m256i,
23289    idx: __m256i,
23290    b: __m256i,
23291) -> __m256i {
23292    unsafe {
23293        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23294        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
23295    }
23296}
23297
23298/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23299///
23300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
23301#[inline]
23302#[target_feature(enable = "avx512f,avx512vl")]
23303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23304#[cfg_attr(test, assert_instr(vpermi2d))]
23305pub fn _mm256_mask2_permutex2var_epi32(
23306    a: __m256i,
23307    idx: __m256i,
23308    k: __mmask8,
23309    b: __m256i,
23310) -> __m256i {
23311    unsafe {
23312        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23313        transmute(simd_select_bitmask(k, permute, idx.as_i32x8()))
23314    }
23315}
23316
23317/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23318///
23319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
23320#[inline]
23321#[target_feature(enable = "avx512f,avx512vl")]
23322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23323#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23324pub fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23325    unsafe { transmute(vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) }
23326}
23327
23328/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23329///
23330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
23331#[inline]
23332#[target_feature(enable = "avx512f,avx512vl")]
23333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23334#[cfg_attr(test, assert_instr(vpermt2d))]
23335pub fn _mm_mask_permutex2var_epi32(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23336    unsafe {
23337        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23338        transmute(simd_select_bitmask(k, permute, a.as_i32x4()))
23339    }
23340}
23341
23342/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23343///
23344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
23345#[inline]
23346#[target_feature(enable = "avx512f,avx512vl")]
23347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23348#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23349pub fn _mm_maskz_permutex2var_epi32(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23350    unsafe {
23351        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23352        transmute(simd_select_bitmask(k, permute, i32x4::ZERO))
23353    }
23354}
23355
23356/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23357///
23358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
23359#[inline]
23360#[target_feature(enable = "avx512f,avx512vl")]
23361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23362#[cfg_attr(test, assert_instr(vpermi2d))]
23363pub fn _mm_mask2_permutex2var_epi32(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23364    unsafe {
23365        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23366        transmute(simd_select_bitmask(k, permute, idx.as_i32x4()))
23367    }
23368}
23369
23370/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23371///
23372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
23373#[inline]
23374#[target_feature(enable = "avx512f")]
23375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23376#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23377pub fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23378    unsafe { transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) }
23379}
23380
23381/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23382///
23383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
23384#[inline]
23385#[target_feature(enable = "avx512f")]
23386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23387#[cfg_attr(test, assert_instr(vpermt2q))]
23388pub fn _mm512_mask_permutex2var_epi64(
23389    a: __m512i,
23390    k: __mmask8,
23391    idx: __m512i,
23392    b: __m512i,
23393) -> __m512i {
23394    unsafe {
23395        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23396        transmute(simd_select_bitmask(k, permute, a.as_i64x8()))
23397    }
23398}
23399
23400/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23401///
23402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
23403#[inline]
23404#[target_feature(enable = "avx512f")]
23405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23406#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23407pub fn _mm512_maskz_permutex2var_epi64(
23408    k: __mmask8,
23409    a: __m512i,
23410    idx: __m512i,
23411    b: __m512i,
23412) -> __m512i {
23413    unsafe {
23414        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23415        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
23416    }
23417}
23418
23419/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23420///
23421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
23422#[inline]
23423#[target_feature(enable = "avx512f")]
23424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23425#[cfg_attr(test, assert_instr(vpermi2q))]
23426pub fn _mm512_mask2_permutex2var_epi64(
23427    a: __m512i,
23428    idx: __m512i,
23429    k: __mmask8,
23430    b: __m512i,
23431) -> __m512i {
23432    unsafe {
23433        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23434        transmute(simd_select_bitmask(k, permute, idx.as_i64x8()))
23435    }
23436}
23437
23438/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23439///
23440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
23441#[inline]
23442#[target_feature(enable = "avx512f,avx512vl")]
23443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23444#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23445pub fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23446    unsafe { transmute(vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) }
23447}
23448
23449/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23450///
23451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
23452#[inline]
23453#[target_feature(enable = "avx512f,avx512vl")]
23454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23455#[cfg_attr(test, assert_instr(vpermt2q))]
23456pub fn _mm256_mask_permutex2var_epi64(
23457    a: __m256i,
23458    k: __mmask8,
23459    idx: __m256i,
23460    b: __m256i,
23461) -> __m256i {
23462    unsafe {
23463        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23464        transmute(simd_select_bitmask(k, permute, a.as_i64x4()))
23465    }
23466}
23467
23468/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23469///
23470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
23471#[inline]
23472#[target_feature(enable = "avx512f,avx512vl")]
23473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23474#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23475pub fn _mm256_maskz_permutex2var_epi64(
23476    k: __mmask8,
23477    a: __m256i,
23478    idx: __m256i,
23479    b: __m256i,
23480) -> __m256i {
23481    unsafe {
23482        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23483        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
23484    }
23485}
23486
23487/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23488///
23489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
23490#[inline]
23491#[target_feature(enable = "avx512f,avx512vl")]
23492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23493#[cfg_attr(test, assert_instr(vpermi2q))]
23494pub fn _mm256_mask2_permutex2var_epi64(
23495    a: __m256i,
23496    idx: __m256i,
23497    k: __mmask8,
23498    b: __m256i,
23499) -> __m256i {
23500    unsafe {
23501        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23502        transmute(simd_select_bitmask(k, permute, idx.as_i64x4()))
23503    }
23504}
23505
23506/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23507///
23508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
23509#[inline]
23510#[target_feature(enable = "avx512f,avx512vl")]
23511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23512#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23513pub fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23514    unsafe { transmute(vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) }
23515}
23516
23517/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23518///
23519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
23520#[inline]
23521#[target_feature(enable = "avx512f,avx512vl")]
23522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23523#[cfg_attr(test, assert_instr(vpermt2q))]
23524pub fn _mm_mask_permutex2var_epi64(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23525    unsafe {
23526        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23527        transmute(simd_select_bitmask(k, permute, a.as_i64x2()))
23528    }
23529}
23530
23531/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23532///
23533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
23534#[inline]
23535#[target_feature(enable = "avx512f,avx512vl")]
23536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23537#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23538pub fn _mm_maskz_permutex2var_epi64(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23539    unsafe {
23540        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23541        transmute(simd_select_bitmask(k, permute, i64x2::ZERO))
23542    }
23543}
23544
23545/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23546///
23547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
23548#[inline]
23549#[target_feature(enable = "avx512f,avx512vl")]
23550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23551#[cfg_attr(test, assert_instr(vpermi2q))]
23552pub fn _mm_mask2_permutex2var_epi64(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23553    unsafe {
23554        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23555        transmute(simd_select_bitmask(k, permute, idx.as_i64x2()))
23556    }
23557}
23558
23559/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23560///
23561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
23562#[inline]
23563#[target_feature(enable = "avx512f")]
23564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23565#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23566pub fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
23567    unsafe { transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) }
23568}
23569
23570/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23571///
23572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
23573#[inline]
23574#[target_feature(enable = "avx512f")]
23575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23576#[cfg_attr(test, assert_instr(vpermt2ps))]
23577pub fn _mm512_mask_permutex2var_ps(a: __m512, k: __mmask16, idx: __m512i, b: __m512) -> __m512 {
23578    unsafe {
23579        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23580        transmute(simd_select_bitmask(k, permute, a.as_f32x16()))
23581    }
23582}
23583
23584/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23585///
23586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
23587#[inline]
23588#[target_feature(enable = "avx512f")]
23589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23590#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23591pub fn _mm512_maskz_permutex2var_ps(k: __mmask16, a: __m512, idx: __m512i, b: __m512) -> __m512 {
23592    unsafe {
23593        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23594        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
23595    }
23596}
23597
23598/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23599///
23600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
23601#[inline]
23602#[target_feature(enable = "avx512f")]
23603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23604#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23605pub fn _mm512_mask2_permutex2var_ps(a: __m512, idx: __m512i, k: __mmask16, b: __m512) -> __m512 {
23606    unsafe {
23607        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23608        let idx = _mm512_castsi512_ps(idx).as_f32x16();
23609        transmute(simd_select_bitmask(k, permute, idx))
23610    }
23611}
23612
23613/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23614///
23615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
23616#[inline]
23617#[target_feature(enable = "avx512f,avx512vl")]
23618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23619#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23620pub fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
23621    unsafe { transmute(vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) }
23622}
23623
23624/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23625///
23626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
23627#[inline]
23628#[target_feature(enable = "avx512f,avx512vl")]
23629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23630#[cfg_attr(test, assert_instr(vpermt2ps))]
23631pub fn _mm256_mask_permutex2var_ps(a: __m256, k: __mmask8, idx: __m256i, b: __m256) -> __m256 {
23632    unsafe {
23633        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23634        transmute(simd_select_bitmask(k, permute, a.as_f32x8()))
23635    }
23636}
23637
23638/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23639///
23640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
23641#[inline]
23642#[target_feature(enable = "avx512f,avx512vl")]
23643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23644#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23645pub fn _mm256_maskz_permutex2var_ps(k: __mmask8, a: __m256, idx: __m256i, b: __m256) -> __m256 {
23646    unsafe {
23647        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23648        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23649    }
23650}
23651
23652/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23653///
23654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
23655#[inline]
23656#[target_feature(enable = "avx512f,avx512vl")]
23657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23658#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23659pub fn _mm256_mask2_permutex2var_ps(a: __m256, idx: __m256i, k: __mmask8, b: __m256) -> __m256 {
23660    unsafe {
23661        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23662        let idx = _mm256_castsi256_ps(idx).as_f32x8();
23663        transmute(simd_select_bitmask(k, permute, idx))
23664    }
23665}
23666
23667/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23668///
23669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
23670#[inline]
23671#[target_feature(enable = "avx512f,avx512vl")]
23672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23673#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23674pub fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
23675    unsafe { transmute(vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) }
23676}
23677
23678/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23679///
23680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
23681#[inline]
23682#[target_feature(enable = "avx512f,avx512vl")]
23683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23684#[cfg_attr(test, assert_instr(vpermt2ps))]
23685pub fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
23686    unsafe {
23687        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23688        transmute(simd_select_bitmask(k, permute, a.as_f32x4()))
23689    }
23690}
23691
23692/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23693///
23694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
23695#[inline]
23696#[target_feature(enable = "avx512f,avx512vl")]
23697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23698#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23699pub fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
23700    unsafe {
23701        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23702        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
23703    }
23704}
23705
23706/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23707///
23708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
23709#[inline]
23710#[target_feature(enable = "avx512f,avx512vl")]
23711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23712#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23713pub fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
23714    unsafe {
23715        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23716        let idx = _mm_castsi128_ps(idx).as_f32x4();
23717        transmute(simd_select_bitmask(k, permute, idx))
23718    }
23719}
23720
23721/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23722///
23723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
23724#[inline]
23725#[target_feature(enable = "avx512f")]
23726#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23727#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23728pub fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23729    unsafe { transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) }
23730}
23731
23732/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23733///
23734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
23735#[inline]
23736#[target_feature(enable = "avx512f")]
23737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23738#[cfg_attr(test, assert_instr(vpermt2pd))]
23739pub fn _mm512_mask_permutex2var_pd(a: __m512d, k: __mmask8, idx: __m512i, b: __m512d) -> __m512d {
23740    unsafe {
23741        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23742        transmute(simd_select_bitmask(k, permute, a.as_f64x8()))
23743    }
23744}
23745
23746/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23747///
23748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
23749#[inline]
23750#[target_feature(enable = "avx512f")]
23751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23752#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23753pub fn _mm512_maskz_permutex2var_pd(k: __mmask8, a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23754    unsafe {
23755        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23756        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23757    }
23758}
23759
23760/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23761///
23762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
23763#[inline]
23764#[target_feature(enable = "avx512f")]
23765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23766#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23767pub fn _mm512_mask2_permutex2var_pd(a: __m512d, idx: __m512i, k: __mmask8, b: __m512d) -> __m512d {
23768    unsafe {
23769        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23770        let idx = _mm512_castsi512_pd(idx).as_f64x8();
23771        transmute(simd_select_bitmask(k, permute, idx))
23772    }
23773}
23774
23775/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23776///
23777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
23778#[inline]
23779#[target_feature(enable = "avx512f,avx512vl")]
23780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23781#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23782pub fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23783    unsafe { transmute(vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) }
23784}
23785
23786/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23787///
23788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
23789#[inline]
23790#[target_feature(enable = "avx512f,avx512vl")]
23791#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23792#[cfg_attr(test, assert_instr(vpermt2pd))]
23793pub fn _mm256_mask_permutex2var_pd(a: __m256d, k: __mmask8, idx: __m256i, b: __m256d) -> __m256d {
23794    unsafe {
23795        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23796        transmute(simd_select_bitmask(k, permute, a.as_f64x4()))
23797    }
23798}
23799
23800/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23801///
23802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
23803#[inline]
23804#[target_feature(enable = "avx512f,avx512vl")]
23805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23806#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23807pub fn _mm256_maskz_permutex2var_pd(k: __mmask8, a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23808    unsafe {
23809        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23810        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23811    }
23812}
23813
23814/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23815///
23816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
23817#[inline]
23818#[target_feature(enable = "avx512f,avx512vl")]
23819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23820#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23821pub fn _mm256_mask2_permutex2var_pd(a: __m256d, idx: __m256i, k: __mmask8, b: __m256d) -> __m256d {
23822    unsafe {
23823        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23824        let idx = _mm256_castsi256_pd(idx).as_f64x4();
23825        transmute(simd_select_bitmask(k, permute, idx))
23826    }
23827}
23828
23829/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23830///
23831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
23832#[inline]
23833#[target_feature(enable = "avx512f,avx512vl")]
23834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23835#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23836pub fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23837    unsafe { transmute(vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) }
23838}
23839
23840/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23841///
23842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
23843#[inline]
23844#[target_feature(enable = "avx512f,avx512vl")]
23845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23846#[cfg_attr(test, assert_instr(vpermt2pd))]
23847pub fn _mm_mask_permutex2var_pd(a: __m128d, k: __mmask8, idx: __m128i, b: __m128d) -> __m128d {
23848    unsafe {
23849        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23850        transmute(simd_select_bitmask(k, permute, a.as_f64x2()))
23851    }
23852}
23853
23854/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23855///
23856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
23857#[inline]
23858#[target_feature(enable = "avx512f,avx512vl")]
23859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23860#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23861pub fn _mm_maskz_permutex2var_pd(k: __mmask8, a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23862    unsafe {
23863        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23864        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
23865    }
23866}
23867
23868/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23869///
23870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
23871#[inline]
23872#[target_feature(enable = "avx512f,avx512vl")]
23873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23874#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23875pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m128d) -> __m128d {
23876    unsafe {
23877        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23878        let idx = _mm_castsi128_pd(idx).as_f64x2();
23879        transmute(simd_select_bitmask(k, permute, idx))
23880    }
23881}
23882
23883/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
23884///
23885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi32&expand=5150)
23886#[inline]
23887#[target_feature(enable = "avx512f")]
23888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23889#[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd
23890#[rustc_legacy_const_generics(1)]
23891pub fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
23892    unsafe {
23893        static_assert_uimm_bits!(MASK, 8);
23894        let r: i32x16 = simd_shuffle!(
23895            a.as_i32x16(),
23896            a.as_i32x16(),
23897            [
23898                MASK as u32 & 0b11,
23899                (MASK as u32 >> 2) & 0b11,
23900                (MASK as u32 >> 4) & 0b11,
23901                (MASK as u32 >> 6) & 0b11,
23902                (MASK as u32 & 0b11) + 4,
23903                ((MASK as u32 >> 2) & 0b11) + 4,
23904                ((MASK as u32 >> 4) & 0b11) + 4,
23905                ((MASK as u32 >> 6) & 0b11) + 4,
23906                (MASK as u32 & 0b11) + 8,
23907                ((MASK as u32 >> 2) & 0b11) + 8,
23908                ((MASK as u32 >> 4) & 0b11) + 8,
23909                ((MASK as u32 >> 6) & 0b11) + 8,
23910                (MASK as u32 & 0b11) + 12,
23911                ((MASK as u32 >> 2) & 0b11) + 12,
23912                ((MASK as u32 >> 4) & 0b11) + 12,
23913                ((MASK as u32 >> 6) & 0b11) + 12,
23914            ],
23915        );
23916        transmute(r)
23917    }
23918}
23919
23920/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23921///
23922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
23923#[inline]
23924#[target_feature(enable = "avx512f")]
23925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23926#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23927#[rustc_legacy_const_generics(3)]
23928pub fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23929    src: __m512i,
23930    k: __mmask16,
23931    a: __m512i,
23932) -> __m512i {
23933    unsafe {
23934        static_assert_uimm_bits!(MASK, 8);
23935        let r = _mm512_shuffle_epi32::<MASK>(a);
23936        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
23937    }
23938}
23939
23940/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23941///
23942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
23943#[inline]
23944#[target_feature(enable = "avx512f")]
23945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23946#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23947#[rustc_legacy_const_generics(2)]
23948pub fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask16, a: __m512i) -> __m512i {
23949    unsafe {
23950        static_assert_uimm_bits!(MASK, 8);
23951        let r = _mm512_shuffle_epi32::<MASK>(a);
23952        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
23953    }
23954}
23955
23956/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23957///
23958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
23959#[inline]
23960#[target_feature(enable = "avx512f,avx512vl")]
23961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23962#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23963#[rustc_legacy_const_generics(3)]
23964pub fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23965    src: __m256i,
23966    k: __mmask8,
23967    a: __m256i,
23968) -> __m256i {
23969    unsafe {
23970        static_assert_uimm_bits!(MASK, 8);
23971        let r = _mm256_shuffle_epi32::<MASK>(a);
23972        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
23973    }
23974}
23975
23976/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23977///
23978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
23979#[inline]
23980#[target_feature(enable = "avx512f,avx512vl")]
23981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23982#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23983#[rustc_legacy_const_generics(2)]
23984pub fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m256i) -> __m256i {
23985    unsafe {
23986        static_assert_uimm_bits!(MASK, 8);
23987        let r = _mm256_shuffle_epi32::<MASK>(a);
23988        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
23989    }
23990}
23991
23992/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23993///
23994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
23995#[inline]
23996#[target_feature(enable = "avx512f,avx512vl")]
23997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23998#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23999#[rustc_legacy_const_generics(3)]
24000pub fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24001    src: __m128i,
24002    k: __mmask8,
24003    a: __m128i,
24004) -> __m128i {
24005    unsafe {
24006        static_assert_uimm_bits!(MASK, 8);
24007        let r = _mm_shuffle_epi32::<MASK>(a);
24008        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
24009    }
24010}
24011
24012/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24013///
24014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
24015#[inline]
24016#[target_feature(enable = "avx512f,avx512vl")]
24017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24018#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24019#[rustc_legacy_const_generics(2)]
24020pub fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m128i) -> __m128i {
24021    unsafe {
24022        static_assert_uimm_bits!(MASK, 8);
24023        let r = _mm_shuffle_epi32::<MASK>(a);
24024        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
24025    }
24026}
24027
24028/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
24029///
24030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
24031#[inline]
24032#[target_feature(enable = "avx512f")]
24033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24034#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24035#[rustc_legacy_const_generics(2)]
24036pub fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24037    unsafe {
24038        static_assert_uimm_bits!(MASK, 8);
24039        simd_shuffle!(
24040            a,
24041            b,
24042            [
24043                MASK as u32 & 0b11,
24044                (MASK as u32 >> 2) & 0b11,
24045                ((MASK as u32 >> 4) & 0b11) + 16,
24046                ((MASK as u32 >> 6) & 0b11) + 16,
24047                (MASK as u32 & 0b11) + 4,
24048                ((MASK as u32 >> 2) & 0b11) + 4,
24049                ((MASK as u32 >> 4) & 0b11) + 20,
24050                ((MASK as u32 >> 6) & 0b11) + 20,
24051                (MASK as u32 & 0b11) + 8,
24052                ((MASK as u32 >> 2) & 0b11) + 8,
24053                ((MASK as u32 >> 4) & 0b11) + 24,
24054                ((MASK as u32 >> 6) & 0b11) + 24,
24055                (MASK as u32 & 0b11) + 12,
24056                ((MASK as u32 >> 2) & 0b11) + 12,
24057                ((MASK as u32 >> 4) & 0b11) + 28,
24058                ((MASK as u32 >> 6) & 0b11) + 28,
24059            ],
24060        )
24061    }
24062}
24063
24064/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24065///
24066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
24067#[inline]
24068#[target_feature(enable = "avx512f")]
24069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24070#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24071#[rustc_legacy_const_generics(4)]
24072pub fn _mm512_mask_shuffle_ps<const MASK: i32>(
24073    src: __m512,
24074    k: __mmask16,
24075    a: __m512,
24076    b: __m512,
24077) -> __m512 {
24078    unsafe {
24079        static_assert_uimm_bits!(MASK, 8);
24080        let r = _mm512_shuffle_ps::<MASK>(a, b);
24081        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
24082    }
24083}
24084
24085/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24086///
24087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
24088#[inline]
24089#[target_feature(enable = "avx512f")]
24090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24091#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24092#[rustc_legacy_const_generics(3)]
24093pub fn _mm512_maskz_shuffle_ps<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24094    unsafe {
24095        static_assert_uimm_bits!(MASK, 8);
24096        let r = _mm512_shuffle_ps::<MASK>(a, b);
24097        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
24098    }
24099}
24100
24101/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24102///
24103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
24104#[inline]
24105#[target_feature(enable = "avx512f,avx512vl")]
24106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24107#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24108#[rustc_legacy_const_generics(4)]
24109pub fn _mm256_mask_shuffle_ps<const MASK: i32>(
24110    src: __m256,
24111    k: __mmask8,
24112    a: __m256,
24113    b: __m256,
24114) -> __m256 {
24115    unsafe {
24116        static_assert_uimm_bits!(MASK, 8);
24117        let r = _mm256_shuffle_ps::<MASK>(a, b);
24118        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
24119    }
24120}
24121
24122/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24123///
24124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
24125#[inline]
24126#[target_feature(enable = "avx512f,avx512vl")]
24127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24128#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24129#[rustc_legacy_const_generics(3)]
24130pub fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24131    unsafe {
24132        static_assert_uimm_bits!(MASK, 8);
24133        let r = _mm256_shuffle_ps::<MASK>(a, b);
24134        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
24135    }
24136}
24137
24138/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24139///
24140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
24141#[inline]
24142#[target_feature(enable = "avx512f,avx512vl")]
24143#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24144#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24145#[rustc_legacy_const_generics(4)]
24146pub fn _mm_mask_shuffle_ps<const MASK: i32>(
24147    src: __m128,
24148    k: __mmask8,
24149    a: __m128,
24150    b: __m128,
24151) -> __m128 {
24152    unsafe {
24153        static_assert_uimm_bits!(MASK, 8);
24154        let r = _mm_shuffle_ps::<MASK>(a, b);
24155        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24156    }
24157}
24158
24159/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24160///
24161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
24162#[inline]
24163#[target_feature(enable = "avx512f,avx512vl")]
24164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24165#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24166#[rustc_legacy_const_generics(3)]
24167pub fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24168    unsafe {
24169        static_assert_uimm_bits!(MASK, 8);
24170        let r = _mm_shuffle_ps::<MASK>(a, b);
24171        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24172    }
24173}
24174
24175/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
24176///
24177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
24178#[inline]
24179#[target_feature(enable = "avx512f")]
24180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24181#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24182#[rustc_legacy_const_generics(2)]
24183pub fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24184    unsafe {
24185        static_assert_uimm_bits!(MASK, 8);
24186        simd_shuffle!(
24187            a,
24188            b,
24189            [
24190                MASK as u32 & 0b1,
24191                ((MASK as u32 >> 1) & 0b1) + 8,
24192                ((MASK as u32 >> 2) & 0b1) + 2,
24193                ((MASK as u32 >> 3) & 0b1) + 10,
24194                ((MASK as u32 >> 4) & 0b1) + 4,
24195                ((MASK as u32 >> 5) & 0b1) + 12,
24196                ((MASK as u32 >> 6) & 0b1) + 6,
24197                ((MASK as u32 >> 7) & 0b1) + 14,
24198            ],
24199        )
24200    }
24201}
24202
24203/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24204///
24205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
24206#[inline]
24207#[target_feature(enable = "avx512f")]
24208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24209#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24210#[rustc_legacy_const_generics(4)]
24211pub fn _mm512_mask_shuffle_pd<const MASK: i32>(
24212    src: __m512d,
24213    k: __mmask8,
24214    a: __m512d,
24215    b: __m512d,
24216) -> __m512d {
24217    unsafe {
24218        static_assert_uimm_bits!(MASK, 8);
24219        let r = _mm512_shuffle_pd::<MASK>(a, b);
24220        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
24221    }
24222}
24223
24224/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24225///
24226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
24227#[inline]
24228#[target_feature(enable = "avx512f")]
24229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24230#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24231#[rustc_legacy_const_generics(3)]
24232pub fn _mm512_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24233    unsafe {
24234        static_assert_uimm_bits!(MASK, 8);
24235        let r = _mm512_shuffle_pd::<MASK>(a, b);
24236        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
24237    }
24238}
24239
24240/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24241///
24242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
24243#[inline]
24244#[target_feature(enable = "avx512f,avx512vl")]
24245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24246#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24247#[rustc_legacy_const_generics(4)]
24248pub fn _mm256_mask_shuffle_pd<const MASK: i32>(
24249    src: __m256d,
24250    k: __mmask8,
24251    a: __m256d,
24252    b: __m256d,
24253) -> __m256d {
24254    unsafe {
24255        static_assert_uimm_bits!(MASK, 8);
24256        let r = _mm256_shuffle_pd::<MASK>(a, b);
24257        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
24258    }
24259}
24260
24261/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24262///
24263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
24264#[inline]
24265#[target_feature(enable = "avx512f,avx512vl")]
24266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24267#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24268#[rustc_legacy_const_generics(3)]
24269pub fn _mm256_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24270    unsafe {
24271        static_assert_uimm_bits!(MASK, 8);
24272        let r = _mm256_shuffle_pd::<MASK>(a, b);
24273        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
24274    }
24275}
24276
24277/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24278///
24279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
24280#[inline]
24281#[target_feature(enable = "avx512f,avx512vl")]
24282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24283#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
24284#[rustc_legacy_const_generics(4)]
24285pub fn _mm_mask_shuffle_pd<const MASK: i32>(
24286    src: __m128d,
24287    k: __mmask8,
24288    a: __m128d,
24289    b: __m128d,
24290) -> __m128d {
24291    unsafe {
24292        static_assert_uimm_bits!(MASK, 8);
24293        let r = _mm_shuffle_pd::<MASK>(a, b);
24294        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
24295    }
24296}
24297
24298/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24299///
24300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
24301#[inline]
24302#[target_feature(enable = "avx512f,avx512vl")]
24303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24304#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
24305#[rustc_legacy_const_generics(3)]
24306pub fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
24307    unsafe {
24308        static_assert_uimm_bits!(MASK, 8);
24309        let r = _mm_shuffle_pd::<MASK>(a, b);
24310        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
24311    }
24312}
24313
24314/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24315///
24316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32x4&expand=5177)
24317#[inline]
24318#[target_feature(enable = "avx512f")]
24319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24320#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4
24321#[rustc_legacy_const_generics(2)]
24322pub fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24323    unsafe {
24324        static_assert_uimm_bits!(MASK, 8);
24325        let a = a.as_i32x16();
24326        let b = b.as_i32x16();
24327        let r: i32x16 = simd_shuffle!(
24328            a,
24329            b,
24330            [
24331                (MASK as u32 & 0b11) * 4 + 0,
24332                (MASK as u32 & 0b11) * 4 + 1,
24333                (MASK as u32 & 0b11) * 4 + 2,
24334                (MASK as u32 & 0b11) * 4 + 3,
24335                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
24336                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
24337                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
24338                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
24339                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
24340                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
24341                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
24342                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
24343                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
24344                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
24345                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
24346                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
24347            ],
24348        );
24349        transmute(r)
24350    }
24351}
24352
24353/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24354///
24355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x4&expand=5175)
24356#[inline]
24357#[target_feature(enable = "avx512f")]
24358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24359#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
24360#[rustc_legacy_const_generics(4)]
24361pub fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
24362    src: __m512i,
24363    k: __mmask16,
24364    a: __m512i,
24365    b: __m512i,
24366) -> __m512i {
24367    unsafe {
24368        static_assert_uimm_bits!(MASK, 8);
24369        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
24370        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
24371    }
24372}
24373
24374/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24375///
24376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32x4&expand=5176)
24377#[inline]
24378#[target_feature(enable = "avx512f")]
24379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24380#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
24381#[rustc_legacy_const_generics(3)]
24382pub fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
24383    k: __mmask16,
24384    a: __m512i,
24385    b: __m512i,
24386) -> __m512i {
24387    unsafe {
24388        static_assert_uimm_bits!(MASK, 8);
24389        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
24390        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
24391    }
24392}
24393
24394/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24395///
24396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
24397#[inline]
24398#[target_feature(enable = "avx512f,avx512vl")]
24399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24400#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4
24401#[rustc_legacy_const_generics(2)]
24402pub fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24403    unsafe {
24404        static_assert_uimm_bits!(MASK, 8);
24405        let a = a.as_i32x8();
24406        let b = b.as_i32x8();
24407        let r: i32x8 = simd_shuffle!(
24408            a,
24409            b,
24410            [
24411                (MASK as u32 & 0b1) * 4 + 0,
24412                (MASK as u32 & 0b1) * 4 + 1,
24413                (MASK as u32 & 0b1) * 4 + 2,
24414                (MASK as u32 & 0b1) * 4 + 3,
24415                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
24416                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
24417                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
24418                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
24419            ],
24420        );
24421        transmute(r)
24422    }
24423}
24424
24425/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24426///
24427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
24428#[inline]
24429#[target_feature(enable = "avx512f,avx512vl")]
24430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24431#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
24432#[rustc_legacy_const_generics(4)]
24433pub fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
24434    src: __m256i,
24435    k: __mmask8,
24436    a: __m256i,
24437    b: __m256i,
24438) -> __m256i {
24439    unsafe {
24440        static_assert_uimm_bits!(MASK, 8);
24441        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
24442        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
24443    }
24444}
24445
24446/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24447///
24448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
24449#[inline]
24450#[target_feature(enable = "avx512f,avx512vl")]
24451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24452#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
24453#[rustc_legacy_const_generics(3)]
24454pub fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24455    unsafe {
24456        static_assert_uimm_bits!(MASK, 8);
24457        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
24458        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
24459    }
24460}
24461
24462/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24463///
24464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
24465#[inline]
24466#[target_feature(enable = "avx512f")]
24467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24468#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24469#[rustc_legacy_const_generics(2)]
24470pub fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24471    unsafe {
24472        static_assert_uimm_bits!(MASK, 8);
24473        let a = a.as_i64x8();
24474        let b = b.as_i64x8();
24475        let r: i64x8 = simd_shuffle!(
24476            a,
24477            b,
24478            [
24479                (MASK as u32 & 0b11) * 2 + 0,
24480                (MASK as u32 & 0b11) * 2 + 1,
24481                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
24482                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
24483                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
24484                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
24485                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
24486                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
24487            ],
24488        );
24489        transmute(r)
24490    }
24491}
24492
24493/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24494///
24495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x2&expand=5181)
24496#[inline]
24497#[target_feature(enable = "avx512f")]
24498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24499#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24500#[rustc_legacy_const_generics(4)]
24501pub fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
24502    src: __m512i,
24503    k: __mmask8,
24504    a: __m512i,
24505    b: __m512i,
24506) -> __m512i {
24507    unsafe {
24508        static_assert_uimm_bits!(MASK, 8);
24509        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
24510        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
24511    }
24512}
24513
24514/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24515///
24516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64x2&expand=5182)
24517#[inline]
24518#[target_feature(enable = "avx512f")]
24519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24520#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24521#[rustc_legacy_const_generics(3)]
24522pub fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
24523    unsafe {
24524        static_assert_uimm_bits!(MASK, 8);
24525        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
24526        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
24527    }
24528}
24529
24530/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24531///
24532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
24533#[inline]
24534#[target_feature(enable = "avx512f,avx512vl")]
24535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24536#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2
24537#[rustc_legacy_const_generics(2)]
24538pub fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24539    unsafe {
24540        static_assert_uimm_bits!(MASK, 8);
24541        let a = a.as_i64x4();
24542        let b = b.as_i64x4();
24543        let r: i64x4 = simd_shuffle!(
24544            a,
24545            b,
24546            [
24547                (MASK as u32 & 0b1) * 2 + 0,
24548                (MASK as u32 & 0b1) * 2 + 1,
24549                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
24550                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
24551            ],
24552        );
24553        transmute(r)
24554    }
24555}
24556
24557/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24558///
24559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
24560#[inline]
24561#[target_feature(enable = "avx512f,avx512vl")]
24562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24563#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
24564#[rustc_legacy_const_generics(4)]
24565pub fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
24566    src: __m256i,
24567    k: __mmask8,
24568    a: __m256i,
24569    b: __m256i,
24570) -> __m256i {
24571    unsafe {
24572        static_assert_uimm_bits!(MASK, 8);
24573        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
24574        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
24575    }
24576}
24577
24578/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24579///
24580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
24581#[inline]
24582#[target_feature(enable = "avx512f,avx512vl")]
24583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24584#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
24585#[rustc_legacy_const_generics(3)]
24586pub fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24587    unsafe {
24588        static_assert_uimm_bits!(MASK, 8);
24589        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
24590        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
24591    }
24592}
24593
24594/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24595///
24596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
24597#[inline]
24598#[target_feature(enable = "avx512f")]
24599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24600#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2
24601#[rustc_legacy_const_generics(2)]
24602pub fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24603    unsafe {
24604        static_assert_uimm_bits!(MASK, 8);
24605        let a = a.as_f32x16();
24606        let b = b.as_f32x16();
24607        let r: f32x16 = simd_shuffle!(
24608            a,
24609            b,
24610            [
24611                (MASK as u32 & 0b11) * 4 + 0,
24612                (MASK as u32 & 0b11) * 4 + 1,
24613                (MASK as u32 & 0b11) * 4 + 2,
24614                (MASK as u32 & 0b11) * 4 + 3,
24615                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
24616                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
24617                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
24618                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
24619                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
24620                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
24621                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
24622                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
24623                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
24624                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
24625                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
24626                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
24627            ],
24628        );
24629        transmute(r)
24630    }
24631}
24632
24633/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24634///
24635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32x4&expand=5163)
24636#[inline]
24637#[target_feature(enable = "avx512f")]
24638#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24639#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
24640#[rustc_legacy_const_generics(4)]
24641pub fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
24642    src: __m512,
24643    k: __mmask16,
24644    a: __m512,
24645    b: __m512,
24646) -> __m512 {
24647    unsafe {
24648        static_assert_uimm_bits!(MASK, 8);
24649        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
24650        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
24651    }
24652}
24653
24654/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24655///
24656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32x4&expand=5164)
24657#[inline]
24658#[target_feature(enable = "avx512f")]
24659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24660#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
24661#[rustc_legacy_const_generics(3)]
24662pub fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24663    unsafe {
24664        static_assert_uimm_bits!(MASK, 8);
24665        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
24666        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
24667    }
24668}
24669
24670/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24671///
24672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
24673#[inline]
24674#[target_feature(enable = "avx512f,avx512vl")]
24675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24676#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4
24677#[rustc_legacy_const_generics(2)]
24678pub fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
24679    unsafe {
24680        static_assert_uimm_bits!(MASK, 8);
24681        let a = a.as_f32x8();
24682        let b = b.as_f32x8();
24683        let r: f32x8 = simd_shuffle!(
24684            a,
24685            b,
24686            [
24687                (MASK as u32 & 0b1) * 4 + 0,
24688                (MASK as u32 & 0b1) * 4 + 1,
24689                (MASK as u32 & 0b1) * 4 + 2,
24690                (MASK as u32 & 0b1) * 4 + 3,
24691                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
24692                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
24693                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
24694                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
24695            ],
24696        );
24697        transmute(r)
24698    }
24699}
24700
24701/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24702///
24703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
24704#[inline]
24705#[target_feature(enable = "avx512f,avx512vl")]
24706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24707#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
24708#[rustc_legacy_const_generics(4)]
24709pub fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
24710    src: __m256,
24711    k: __mmask8,
24712    a: __m256,
24713    b: __m256,
24714) -> __m256 {
24715    unsafe {
24716        static_assert_uimm_bits!(MASK, 8);
24717        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
24718        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
24719    }
24720}
24721
24722/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24723///
24724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
24725#[inline]
24726#[target_feature(enable = "avx512f,avx512vl")]
24727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24728#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
24729#[rustc_legacy_const_generics(3)]
24730pub fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24731    unsafe {
24732        static_assert_uimm_bits!(MASK, 8);
24733        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
24734        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
24735    }
24736}
24737
24738/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24739///
24740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
24741#[inline]
24742#[target_feature(enable = "avx512f")]
24743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24744#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24745#[rustc_legacy_const_generics(2)]
24746pub fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24747    unsafe {
24748        static_assert_uimm_bits!(MASK, 8);
24749        let a = a.as_f64x8();
24750        let b = b.as_f64x8();
24751        let r: f64x8 = simd_shuffle!(
24752            a,
24753            b,
24754            [
24755                (MASK as u32 & 0b11) * 2 + 0,
24756                (MASK as u32 & 0b11) * 2 + 1,
24757                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
24758                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
24759                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
24760                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
24761                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
24762                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
24763            ],
24764        );
24765        transmute(r)
24766    }
24767}
24768
24769/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24770///
24771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
24772#[inline]
24773#[target_feature(enable = "avx512f")]
24774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24775#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24776#[rustc_legacy_const_generics(4)]
24777pub fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
24778    src: __m512d,
24779    k: __mmask8,
24780    a: __m512d,
24781    b: __m512d,
24782) -> __m512d {
24783    unsafe {
24784        static_assert_uimm_bits!(MASK, 8);
24785        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
24786        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
24787    }
24788}
24789
24790/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24791///
24792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
24793#[inline]
24794#[target_feature(enable = "avx512f")]
24795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24796#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24797#[rustc_legacy_const_generics(3)]
24798pub fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24799    unsafe {
24800        static_assert_uimm_bits!(MASK, 8);
24801        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
24802        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
24803    }
24804}
24805
24806/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24807///
24808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
24809#[inline]
24810#[target_feature(enable = "avx512f,avx512vl")]
24811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24812#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2
24813#[rustc_legacy_const_generics(2)]
24814pub fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
24815    unsafe {
24816        static_assert_uimm_bits!(MASK, 8);
24817        let a = a.as_f64x4();
24818        let b = b.as_f64x4();
24819        let r: f64x4 = simd_shuffle!(
24820            a,
24821            b,
24822            [
24823                (MASK as u32 & 0b1) * 2 + 0,
24824                (MASK as u32 & 0b1) * 2 + 1,
24825                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
24826                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
24827            ],
24828        );
24829        transmute(r)
24830    }
24831}
24832
24833/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24834///
24835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
24836#[inline]
24837#[target_feature(enable = "avx512f,avx512vl")]
24838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24839#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
24840#[rustc_legacy_const_generics(4)]
24841pub fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
24842    src: __m256d,
24843    k: __mmask8,
24844    a: __m256d,
24845    b: __m256d,
24846) -> __m256d {
24847    unsafe {
24848        static_assert_uimm_bits!(MASK, 8);
24849        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
24850        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
24851    }
24852}
24853
24854/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24855///
24856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
24857#[inline]
24858#[target_feature(enable = "avx512f,avx512vl")]
24859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24860#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
24861#[rustc_legacy_const_generics(3)]
24862pub fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24863    unsafe {
24864        static_assert_uimm_bits!(MASK, 8);
24865        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
24866        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
24867    }
24868}
24869
24870/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24871///
24872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
24873#[inline]
24874#[target_feature(enable = "avx512f")]
24875#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24876#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
24877#[rustc_legacy_const_generics(1)]
24878pub fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
24879    unsafe {
24880        static_assert_uimm_bits!(IMM8, 2);
24881        match IMM8 & 0x3 {
24882            0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
24883            1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
24884            2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
24885            _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
24886        }
24887    }
24888}
24889
24890/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24891///
24892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
24893#[inline]
24894#[target_feature(enable = "avx512f")]
24895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24896#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
24897#[rustc_legacy_const_generics(3)]
24898pub fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m512) -> __m128 {
24899    unsafe {
24900        static_assert_uimm_bits!(IMM8, 2);
24901        let r = _mm512_extractf32x4_ps::<IMM8>(a);
24902        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24903    }
24904}
24905
24906/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24907///
24908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
24909#[inline]
24910#[target_feature(enable = "avx512f")]
24911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24912#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
24913#[rustc_legacy_const_generics(2)]
24914pub fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
24915    unsafe {
24916        static_assert_uimm_bits!(IMM8, 2);
24917        let r = _mm512_extractf32x4_ps::<IMM8>(a);
24918        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24919    }
24920}
24921
24922/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24923///
24924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
24925#[inline]
24926#[target_feature(enable = "avx512f,avx512vl")]
24927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24928#[cfg_attr(
24929    test,
24930    assert_instr(vextract, IMM8 = 1) //should be vextractf32x4
24931)]
24932#[rustc_legacy_const_generics(1)]
24933pub fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
24934    unsafe {
24935        static_assert_uimm_bits!(IMM8, 1);
24936        match IMM8 & 0x1 {
24937            0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
24938            _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
24939        }
24940    }
24941}
24942
24943/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24944///
24945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
24946#[inline]
24947#[target_feature(enable = "avx512f,avx512vl")]
24948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24949#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
24950#[rustc_legacy_const_generics(3)]
24951pub fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m256) -> __m128 {
24952    unsafe {
24953        static_assert_uimm_bits!(IMM8, 1);
24954        let r = _mm256_extractf32x4_ps::<IMM8>(a);
24955        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24956    }
24957}
24958
24959/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24960///
24961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
24962#[inline]
24963#[target_feature(enable = "avx512f,avx512vl")]
24964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24965#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
24966#[rustc_legacy_const_generics(2)]
24967pub fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
24968    unsafe {
24969        static_assert_uimm_bits!(IMM8, 1);
24970        let r = _mm256_extractf32x4_ps::<IMM8>(a);
24971        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24972    }
24973}
24974
24975/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
24976///
24977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
24978#[inline]
24979#[target_feature(enable = "avx512f")]
24980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24981#[cfg_attr(
24982    test,
24983    assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4
24984)]
24985#[rustc_legacy_const_generics(1)]
24986pub fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
24987    unsafe {
24988        static_assert_uimm_bits!(IMM1, 1);
24989        match IMM1 {
24990            0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]),
24991            _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]),
24992        }
24993    }
24994}
24995
24996/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24997///
24998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
24999#[inline]
25000#[target_feature(enable = "avx512f")]
25001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25002#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
25003#[rustc_legacy_const_generics(3)]
25004pub fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
25005    src: __m256i,
25006    k: __mmask8,
25007    a: __m512i,
25008) -> __m256i {
25009    unsafe {
25010        static_assert_uimm_bits!(IMM1, 1);
25011        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
25012        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
25013    }
25014}
25015
25016/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25017///
25018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
25019#[inline]
25020#[target_feature(enable = "avx512f")]
25021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25022#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
25023#[rustc_legacy_const_generics(2)]
25024pub fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
25025    unsafe {
25026        static_assert_uimm_bits!(IMM1, 1);
25027        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
25028        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
25029    }
25030}
25031
25032/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25033///
25034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
25035#[inline]
25036#[target_feature(enable = "avx512f")]
25037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25038#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25039#[rustc_legacy_const_generics(1)]
25040pub fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
25041    unsafe {
25042        static_assert_uimm_bits!(IMM8, 1);
25043        match IMM8 & 0x1 {
25044            0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
25045            _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
25046        }
25047    }
25048}
25049
25050/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25051///
25052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
25053#[inline]
25054#[target_feature(enable = "avx512f")]
25055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25056#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25057#[rustc_legacy_const_generics(3)]
25058pub fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
25059    src: __m256d,
25060    k: __mmask8,
25061    a: __m512d,
25062) -> __m256d {
25063    unsafe {
25064        static_assert_uimm_bits!(IMM8, 1);
25065        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25066        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
25067    }
25068}
25069
25070/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25071///
25072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
25073#[inline]
25074#[target_feature(enable = "avx512f")]
25075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25076#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25077#[rustc_legacy_const_generics(2)]
25078pub fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
25079    unsafe {
25080        static_assert_uimm_bits!(IMM8, 1);
25081        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25082        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
25083    }
25084}
25085
25086/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
25087///
25088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
25089#[inline]
25090#[target_feature(enable = "avx512f")]
25091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25092#[cfg_attr(
25093    test,
25094    assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4
25095)]
25096#[rustc_legacy_const_generics(1)]
25097pub fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
25098    unsafe {
25099        static_assert_uimm_bits!(IMM2, 2);
25100        let a = a.as_i32x16();
25101        let zero = i32x16::ZERO;
25102        let extract: i32x4 = match IMM2 {
25103            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25104            1 => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25105            2 => simd_shuffle!(a, zero, [8, 9, 10, 11]),
25106            _ => simd_shuffle!(a, zero, [12, 13, 14, 15]),
25107        };
25108        transmute(extract)
25109    }
25110}
25111
25112/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25113///
25114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
25115#[inline]
25116#[target_feature(enable = "avx512f")]
25117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25118#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
25119#[rustc_legacy_const_generics(3)]
25120pub fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
25121    src: __m128i,
25122    k: __mmask8,
25123    a: __m512i,
25124) -> __m128i {
25125    unsafe {
25126        static_assert_uimm_bits!(IMM2, 2);
25127        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
25128        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
25129    }
25130}
25131
25132/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25133///
25134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
25135#[inline]
25136#[target_feature(enable = "avx512f")]
25137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25138#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
25139#[rustc_legacy_const_generics(2)]
25140pub fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
25141    unsafe {
25142        static_assert_uimm_bits!(IMM2, 2);
25143        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
25144        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
25145    }
25146}
25147
25148/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
25149///
25150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
25151#[inline]
25152#[target_feature(enable = "avx512f,avx512vl")]
25153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25154#[cfg_attr(
25155    test,
25156    assert_instr(vextract, IMM1 = 1) //should be vextracti32x4
25157)]
25158#[rustc_legacy_const_generics(1)]
25159pub fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
25160    unsafe {
25161        static_assert_uimm_bits!(IMM1, 1);
25162        let a = a.as_i32x8();
25163        let zero = i32x8::ZERO;
25164        let extract: i32x4 = match IMM1 {
25165            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25166            _ => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25167        };
25168        transmute(extract)
25169    }
25170}
25171
25172/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25173///
25174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
25175#[inline]
25176#[target_feature(enable = "avx512f,avx512vl")]
25177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25178#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
25179#[rustc_legacy_const_generics(3)]
25180pub fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
25181    src: __m128i,
25182    k: __mmask8,
25183    a: __m256i,
25184) -> __m128i {
25185    unsafe {
25186        static_assert_uimm_bits!(IMM1, 1);
25187        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
25188        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
25189    }
25190}
25191
25192/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25193///
25194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
25195#[inline]
25196#[target_feature(enable = "avx512f,avx512vl")]
25197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25198#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
25199#[rustc_legacy_const_generics(2)]
25200pub fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
25201    unsafe {
25202        static_assert_uimm_bits!(IMM1, 1);
25203        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
25204        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
25205    }
25206}
25207
25208/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25209///
25210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
25211#[inline]
25212#[target_feature(enable = "avx512f")]
25213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25214#[cfg_attr(test, assert_instr(vmovsldup))]
25215pub fn _mm512_moveldup_ps(a: __m512) -> __m512 {
25216    unsafe {
25217        let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25218        transmute(r)
25219    }
25220}
25221
25222/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25223///
25224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
25225#[inline]
25226#[target_feature(enable = "avx512f")]
25227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25228#[cfg_attr(test, assert_instr(vmovsldup))]
25229pub fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25230    unsafe {
25231        let mov: f32x16 =
25232            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25233        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
25234    }
25235}
25236
25237/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25238///
25239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
25240#[inline]
25241#[target_feature(enable = "avx512f")]
25242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25243#[cfg_attr(test, assert_instr(vmovsldup))]
25244pub fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
25245    unsafe {
25246        let mov: f32x16 =
25247            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25248        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
25249    }
25250}
25251
25252/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25253///
25254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
25255#[inline]
25256#[target_feature(enable = "avx512f,avx512vl")]
25257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25258#[cfg_attr(test, assert_instr(vmovsldup))]
25259pub fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25260    unsafe {
25261        let mov = _mm256_moveldup_ps(a);
25262        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
25263    }
25264}
25265
25266/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25267///
25268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
25269#[inline]
25270#[target_feature(enable = "avx512f,avx512vl")]
25271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25272#[cfg_attr(test, assert_instr(vmovsldup))]
25273pub fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
25274    unsafe {
25275        let mov = _mm256_moveldup_ps(a);
25276        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
25277    }
25278}
25279
25280/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25281///
25282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
25283#[inline]
25284#[target_feature(enable = "avx512f,avx512vl")]
25285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25286#[cfg_attr(test, assert_instr(vmovsldup))]
25287pub fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25288    unsafe {
25289        let mov = _mm_moveldup_ps(a);
25290        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
25291    }
25292}
25293
25294/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25295///
25296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
25297#[inline]
25298#[target_feature(enable = "avx512f,avx512vl")]
25299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25300#[cfg_attr(test, assert_instr(vmovsldup))]
25301pub fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
25302    unsafe {
25303        let mov = _mm_moveldup_ps(a);
25304        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
25305    }
25306}
25307
25308/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25309///
25310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movehdup_ps&expand=3852)
25311#[inline]
25312#[target_feature(enable = "avx512f")]
25313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25314#[cfg_attr(test, assert_instr(vmovshdup))]
25315pub fn _mm512_movehdup_ps(a: __m512) -> __m512 {
25316    unsafe {
25317        let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25318        transmute(r)
25319    }
25320}
25321
25322/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25323///
25324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup_ps&expand=3850)
25325#[inline]
25326#[target_feature(enable = "avx512f")]
25327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25328#[cfg_attr(test, assert_instr(vmovshdup))]
25329pub fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25330    unsafe {
25331        let mov: f32x16 =
25332            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25333        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
25334    }
25335}
25336
25337/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25338///
25339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movehdup_ps&expand=3851)
25340#[inline]
25341#[target_feature(enable = "avx512f")]
25342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25343#[cfg_attr(test, assert_instr(vmovshdup))]
25344pub fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
25345    unsafe {
25346        let mov: f32x16 =
25347            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25348        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
25349    }
25350}
25351
25352/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25353///
25354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
25355#[inline]
25356#[target_feature(enable = "avx512f,avx512vl")]
25357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25358#[cfg_attr(test, assert_instr(vmovshdup))]
25359pub fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25360    unsafe {
25361        let mov = _mm256_movehdup_ps(a);
25362        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
25363    }
25364}
25365
25366/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25367///
25368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
25369#[inline]
25370#[target_feature(enable = "avx512f,avx512vl")]
25371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25372#[cfg_attr(test, assert_instr(vmovshdup))]
25373pub fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
25374    unsafe {
25375        let mov = _mm256_movehdup_ps(a);
25376        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
25377    }
25378}
25379
25380/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25381///
25382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
25383#[inline]
25384#[target_feature(enable = "avx512f,avx512vl")]
25385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25386#[cfg_attr(test, assert_instr(vmovshdup))]
25387pub fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25388    unsafe {
25389        let mov = _mm_movehdup_ps(a);
25390        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
25391    }
25392}
25393
25394/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25395///
25396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
25397#[inline]
25398#[target_feature(enable = "avx512f,avx512vl")]
25399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25400#[cfg_attr(test, assert_instr(vmovshdup))]
25401pub fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
25402    unsafe {
25403        let mov = _mm_movehdup_ps(a);
25404        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
25405    }
25406}
25407
25408/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
25409///
25410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
25411#[inline]
25412#[target_feature(enable = "avx512f")]
25413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25414#[cfg_attr(test, assert_instr(vmovddup))]
25415pub fn _mm512_movedup_pd(a: __m512d) -> __m512d {
25416    unsafe {
25417        let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25418        transmute(r)
25419    }
25420}
25421
25422/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25423///
25424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
25425#[inline]
25426#[target_feature(enable = "avx512f")]
25427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25428#[cfg_attr(test, assert_instr(vmovddup))]
25429pub fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
25430    unsafe {
25431        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25432        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
25433    }
25434}
25435
25436/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25437///
25438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
25439#[inline]
25440#[target_feature(enable = "avx512f")]
25441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25442#[cfg_attr(test, assert_instr(vmovddup))]
25443pub fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
25444    unsafe {
25445        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25446        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
25447    }
25448}
25449
25450/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25451///
25452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
25453#[inline]
25454#[target_feature(enable = "avx512f,avx512vl")]
25455#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25456#[cfg_attr(test, assert_instr(vmovddup))]
25457pub fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
25458    unsafe {
25459        let mov = _mm256_movedup_pd(a);
25460        transmute(simd_select_bitmask(k, mov.as_f64x4(), src.as_f64x4()))
25461    }
25462}
25463
25464/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25465///
25466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
25467#[inline]
25468#[target_feature(enable = "avx512f,avx512vl")]
25469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25470#[cfg_attr(test, assert_instr(vmovddup))]
25471pub fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
25472    unsafe {
25473        let mov = _mm256_movedup_pd(a);
25474        transmute(simd_select_bitmask(k, mov.as_f64x4(), f64x4::ZERO))
25475    }
25476}
25477
25478/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25479///
25480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
25481#[inline]
25482#[target_feature(enable = "avx512f,avx512vl")]
25483#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25484#[cfg_attr(test, assert_instr(vmovddup))]
25485pub fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
25486    unsafe {
25487        let mov = _mm_movedup_pd(a);
25488        transmute(simd_select_bitmask(k, mov.as_f64x2(), src.as_f64x2()))
25489    }
25490}
25491
25492/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25493///
25494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
25495#[inline]
25496#[target_feature(enable = "avx512f,avx512vl")]
25497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25498#[cfg_attr(test, assert_instr(vmovddup))]
25499pub fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
25500    unsafe {
25501        let mov = _mm_movedup_pd(a);
25502        transmute(simd_select_bitmask(k, mov.as_f64x2(), f64x2::ZERO))
25503    }
25504}
25505
25506/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25507///
25508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
25509#[inline]
25510#[target_feature(enable = "avx512f")]
25511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25512#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4
25513#[rustc_legacy_const_generics(2)]
25514pub fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
25515    unsafe {
25516        static_assert_uimm_bits!(IMM8, 2);
25517        let a = a.as_i32x16();
25518        let b = _mm512_castsi128_si512(b).as_i32x16();
25519        let ret: i32x16 = match IMM8 & 0b11 {
25520            0 => {
25521                simd_shuffle!(
25522                    a,
25523                    b,
25524                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
25525                )
25526            }
25527            1 => {
25528                simd_shuffle!(
25529                    a,
25530                    b,
25531                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
25532                )
25533            }
25534            2 => {
25535                simd_shuffle!(
25536                    a,
25537                    b,
25538                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
25539                )
25540            }
25541            _ => {
25542                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
25543            }
25544        };
25545        transmute(ret)
25546    }
25547}
25548
25549/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25550///
25551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
25552#[inline]
25553#[target_feature(enable = "avx512f")]
25554#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25555#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
25556#[rustc_legacy_const_generics(4)]
25557pub fn _mm512_mask_inserti32x4<const IMM8: i32>(
25558    src: __m512i,
25559    k: __mmask16,
25560    a: __m512i,
25561    b: __m128i,
25562) -> __m512i {
25563    unsafe {
25564        static_assert_uimm_bits!(IMM8, 2);
25565        let r = _mm512_inserti32x4::<IMM8>(a, b);
25566        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
25567    }
25568}
25569
25570/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25571///
25572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
25573#[inline]
25574#[target_feature(enable = "avx512f")]
25575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25576#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
25577#[rustc_legacy_const_generics(3)]
25578pub fn _mm512_maskz_inserti32x4<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m128i) -> __m512i {
25579    unsafe {
25580        static_assert_uimm_bits!(IMM8, 2);
25581        let r = _mm512_inserti32x4::<IMM8>(a, b);
25582        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
25583    }
25584}
25585
25586/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25587///
25588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
25589#[inline]
25590#[target_feature(enable = "avx512f,avx512vl")]
25591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25592#[cfg_attr(
25593    test,
25594    assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4
25595)]
25596#[rustc_legacy_const_generics(2)]
25597pub fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
25598    unsafe {
25599        static_assert_uimm_bits!(IMM8, 1);
25600        let a = a.as_i32x8();
25601        let b = _mm256_castsi128_si256(b).as_i32x8();
25602        let ret: i32x8 = match IMM8 & 0b1 {
25603            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25604            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25605        };
25606        transmute(ret)
25607    }
25608}
25609
25610/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25611///
25612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
25613#[inline]
25614#[target_feature(enable = "avx512f,avx512vl")]
25615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25616#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
25617#[rustc_legacy_const_generics(4)]
25618pub fn _mm256_mask_inserti32x4<const IMM8: i32>(
25619    src: __m256i,
25620    k: __mmask8,
25621    a: __m256i,
25622    b: __m128i,
25623) -> __m256i {
25624    unsafe {
25625        static_assert_uimm_bits!(IMM8, 1);
25626        let r = _mm256_inserti32x4::<IMM8>(a, b);
25627        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
25628    }
25629}
25630
25631/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25632///
25633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
25634#[inline]
25635#[target_feature(enable = "avx512f,avx512vl")]
25636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25637#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
25638#[rustc_legacy_const_generics(3)]
25639pub fn _mm256_maskz_inserti32x4<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m128i) -> __m256i {
25640    unsafe {
25641        static_assert_uimm_bits!(IMM8, 1);
25642        let r = _mm256_inserti32x4::<IMM8>(a, b);
25643        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
25644    }
25645}
25646
25647/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
25648///
25649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
25650#[inline]
25651#[target_feature(enable = "avx512f")]
25652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25653#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4
25654#[rustc_legacy_const_generics(2)]
25655pub fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
25656    unsafe {
25657        static_assert_uimm_bits!(IMM8, 1);
25658        let b = _mm512_castsi256_si512(b);
25659        match IMM8 & 0b1 {
25660            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25661            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25662        }
25663    }
25664}
25665
25666/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25667///
25668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
25669#[inline]
25670#[target_feature(enable = "avx512f")]
25671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25672#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
25673#[rustc_legacy_const_generics(4)]
25674pub fn _mm512_mask_inserti64x4<const IMM8: i32>(
25675    src: __m512i,
25676    k: __mmask8,
25677    a: __m512i,
25678    b: __m256i,
25679) -> __m512i {
25680    unsafe {
25681        static_assert_uimm_bits!(IMM8, 1);
25682        let r = _mm512_inserti64x4::<IMM8>(a, b);
25683        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
25684    }
25685}
25686
25687/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25688///
25689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
25690#[inline]
25691#[target_feature(enable = "avx512f")]
25692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25693#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
25694#[rustc_legacy_const_generics(3)]
25695pub fn _mm512_maskz_inserti64x4<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m256i) -> __m512i {
25696    unsafe {
25697        static_assert_uimm_bits!(IMM8, 1);
25698        let r = _mm512_inserti64x4::<IMM8>(a, b);
25699        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
25700    }
25701}
25702
25703/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25704///
25705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
25706#[inline]
25707#[target_feature(enable = "avx512f")]
25708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25709#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25710#[rustc_legacy_const_generics(2)]
25711pub fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
25712    unsafe {
25713        static_assert_uimm_bits!(IMM8, 2);
25714        let b = _mm512_castps128_ps512(b);
25715        match IMM8 & 0b11 {
25716            0 => {
25717                simd_shuffle!(
25718                    a,
25719                    b,
25720                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
25721                )
25722            }
25723            1 => {
25724                simd_shuffle!(
25725                    a,
25726                    b,
25727                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
25728                )
25729            }
25730            2 => {
25731                simd_shuffle!(
25732                    a,
25733                    b,
25734                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
25735                )
25736            }
25737            _ => {
25738                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
25739            }
25740        }
25741    }
25742}
25743
25744/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25745///
25746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
25747#[inline]
25748#[target_feature(enable = "avx512f")]
25749#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25750#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25751#[rustc_legacy_const_generics(4)]
25752pub fn _mm512_mask_insertf32x4<const IMM8: i32>(
25753    src: __m512,
25754    k: __mmask16,
25755    a: __m512,
25756    b: __m128,
25757) -> __m512 {
25758    unsafe {
25759        static_assert_uimm_bits!(IMM8, 2);
25760        let r = _mm512_insertf32x4::<IMM8>(a, b);
25761        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
25762    }
25763}
25764
25765/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25766///
25767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
25768#[inline]
25769#[target_feature(enable = "avx512f")]
25770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25771#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25772#[rustc_legacy_const_generics(3)]
25773pub fn _mm512_maskz_insertf32x4<const IMM8: i32>(k: __mmask16, a: __m512, b: __m128) -> __m512 {
25774    unsafe {
25775        static_assert_uimm_bits!(IMM8, 2);
25776        let r = _mm512_insertf32x4::<IMM8>(a, b);
25777        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
25778    }
25779}
25780
25781/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25782///
25783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
25784#[inline]
25785#[target_feature(enable = "avx512f,avx512vl")]
25786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25787#[cfg_attr(
25788    test,
25789    assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4
25790)]
25791#[rustc_legacy_const_generics(2)]
25792pub fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
25793    unsafe {
25794        static_assert_uimm_bits!(IMM8, 1);
25795        let b = _mm256_castps128_ps256(b);
25796        match IMM8 & 0b1 {
25797            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25798            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25799        }
25800    }
25801}
25802
25803/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25804///
25805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
25806#[inline]
25807#[target_feature(enable = "avx512f,avx512vl")]
25808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25809#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
25810#[rustc_legacy_const_generics(4)]
25811pub fn _mm256_mask_insertf32x4<const IMM8: i32>(
25812    src: __m256,
25813    k: __mmask8,
25814    a: __m256,
25815    b: __m128,
25816) -> __m256 {
25817    unsafe {
25818        static_assert_uimm_bits!(IMM8, 1);
25819        let r = _mm256_insertf32x4::<IMM8>(a, b);
25820        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
25821    }
25822}
25823
25824/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25825///
25826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
25827#[inline]
25828#[target_feature(enable = "avx512f,avx512vl")]
25829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25830#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
25831#[rustc_legacy_const_generics(3)]
25832pub fn _mm256_maskz_insertf32x4<const IMM8: i32>(k: __mmask8, a: __m256, b: __m128) -> __m256 {
25833    unsafe {
25834        static_assert_uimm_bits!(IMM8, 1);
25835        let r = _mm256_insertf32x4::<IMM8>(a, b);
25836        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
25837    }
25838}
25839
25840/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
25841///
25842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
25843#[inline]
25844#[target_feature(enable = "avx512f")]
25845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25846#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25847#[rustc_legacy_const_generics(2)]
25848pub fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
25849    unsafe {
25850        static_assert_uimm_bits!(IMM8, 1);
25851        let b = _mm512_castpd256_pd512(b);
25852        match IMM8 & 0b1 {
25853            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25854            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25855        }
25856    }
25857}
25858
25859/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25860///
25861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
25862#[inline]
25863#[target_feature(enable = "avx512f")]
25864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25865#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25866#[rustc_legacy_const_generics(4)]
25867pub fn _mm512_mask_insertf64x4<const IMM8: i32>(
25868    src: __m512d,
25869    k: __mmask8,
25870    a: __m512d,
25871    b: __m256d,
25872) -> __m512d {
25873    unsafe {
25874        static_assert_uimm_bits!(IMM8, 1);
25875        let r = _mm512_insertf64x4::<IMM8>(a, b);
25876        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
25877    }
25878}
25879
25880/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25881///
25882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
25883#[inline]
25884#[target_feature(enable = "avx512f")]
25885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25886#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25887#[rustc_legacy_const_generics(3)]
25888pub fn _mm512_maskz_insertf64x4<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m256d) -> __m512d {
25889    unsafe {
25890        static_assert_uimm_bits!(IMM8, 1);
25891        let r = _mm512_insertf64x4::<IMM8>(a, b);
25892        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
25893    }
25894}
25895
25896/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
25897///
25898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
25899#[inline]
25900#[target_feature(enable = "avx512f")]
25901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25902#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
25903pub fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
25904    unsafe {
25905        let a = a.as_i32x16();
25906        let b = b.as_i32x16();
25907        #[rustfmt::skip]
25908        let r: i32x16 = simd_shuffle!(
25909            a, b,
25910            [ 2, 18, 3, 19,
25911              2 + 4, 18 + 4, 3 + 4, 19 + 4,
25912              2 + 8, 18 + 8, 3 + 8, 19 + 8,
25913              2 + 12, 18 + 12, 3 + 12, 19 + 12],
25914        );
25915        transmute(r)
25916    }
25917}
25918
25919/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25920///
25921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
25922#[inline]
25923#[target_feature(enable = "avx512f")]
25924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25925#[cfg_attr(test, assert_instr(vpunpckhdq))]
25926pub fn _mm512_mask_unpackhi_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25927    unsafe {
25928        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
25929        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16()))
25930    }
25931}
25932
25933/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25934///
25935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
25936#[inline]
25937#[target_feature(enable = "avx512f")]
25938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25939#[cfg_attr(test, assert_instr(vpunpckhdq))]
25940pub fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25941    unsafe {
25942        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
25943        transmute(simd_select_bitmask(k, unpackhi, i32x16::ZERO))
25944    }
25945}
25946
25947/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25948///
25949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
25950#[inline]
25951#[target_feature(enable = "avx512f,avx512vl")]
25952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25953#[cfg_attr(test, assert_instr(vpunpckhdq))]
25954pub fn _mm256_mask_unpackhi_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25955    unsafe {
25956        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
25957        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x8()))
25958    }
25959}
25960
25961/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25962///
25963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
25964#[inline]
25965#[target_feature(enable = "avx512f,avx512vl")]
25966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25967#[cfg_attr(test, assert_instr(vpunpckhdq))]
25968pub fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25969    unsafe {
25970        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
25971        transmute(simd_select_bitmask(k, unpackhi, i32x8::ZERO))
25972    }
25973}
25974
25975/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25976///
25977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
25978#[inline]
25979#[target_feature(enable = "avx512f,avx512vl")]
25980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25981#[cfg_attr(test, assert_instr(vpunpckhdq))]
25982pub fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25983    unsafe {
25984        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
25985        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x4()))
25986    }
25987}
25988
25989/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25990///
25991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
25992#[inline]
25993#[target_feature(enable = "avx512f,avx512vl")]
25994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25995#[cfg_attr(test, assert_instr(vpunpckhdq))]
25996pub fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25997    unsafe {
25998        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
25999        transmute(simd_select_bitmask(k, unpackhi, i32x4::ZERO))
26000    }
26001}
26002
26003/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
26004///
26005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
26006#[inline]
26007#[target_feature(enable = "avx512f")]
26008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26009#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
26010pub fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
26011    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
26012}
26013
26014/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26015///
26016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
26017#[inline]
26018#[target_feature(enable = "avx512f")]
26019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26020#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26021pub fn _mm512_mask_unpackhi_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26022    unsafe {
26023        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
26024        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8()))
26025    }
26026}
26027
26028/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26029///
26030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
26031#[inline]
26032#[target_feature(enable = "avx512f")]
26033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26034#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26035pub fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26036    unsafe {
26037        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
26038        transmute(simd_select_bitmask(k, unpackhi, i64x8::ZERO))
26039    }
26040}
26041
26042/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26043///
26044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
26045#[inline]
26046#[target_feature(enable = "avx512f,avx512vl")]
26047#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26048#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26049pub fn _mm256_mask_unpackhi_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26050    unsafe {
26051        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
26052        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x4()))
26053    }
26054}
26055
26056/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26057///
26058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
26059#[inline]
26060#[target_feature(enable = "avx512f,avx512vl")]
26061#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26062#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26063pub fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26064    unsafe {
26065        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
26066        transmute(simd_select_bitmask(k, unpackhi, i64x4::ZERO))
26067    }
26068}
26069
26070/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26071///
26072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
26073#[inline]
26074#[target_feature(enable = "avx512f,avx512vl")]
26075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26076#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26077pub fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26078    unsafe {
26079        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
26080        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x2()))
26081    }
26082}
26083
26084/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26085///
26086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
26087#[inline]
26088#[target_feature(enable = "avx512f,avx512vl")]
26089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26090#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26091pub fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26092    unsafe {
26093        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
26094        transmute(simd_select_bitmask(k, unpackhi, i64x2::ZERO))
26095    }
26096}
26097
26098/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26099///
26100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
26101#[inline]
26102#[target_feature(enable = "avx512f")]
26103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26104#[cfg_attr(test, assert_instr(vunpckhps))]
26105pub fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
26106    unsafe {
26107        #[rustfmt::skip]
26108        simd_shuffle!(
26109            a, b,
26110            [ 2, 18, 3, 19,
26111              2 + 4, 18 + 4, 3 + 4, 19 + 4,
26112              2 + 8, 18 + 8, 3 + 8, 19 + 8,
26113              2 + 12, 18 + 12, 3 + 12, 19 + 12],
26114        )
26115    }
26116}
26117
26118/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26119///
26120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
26121#[inline]
26122#[target_feature(enable = "avx512f")]
26123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26124#[cfg_attr(test, assert_instr(vunpckhps))]
26125pub fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26126    unsafe {
26127        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
26128        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16()))
26129    }
26130}
26131
26132/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26133///
26134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
26135#[inline]
26136#[target_feature(enable = "avx512f")]
26137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26138#[cfg_attr(test, assert_instr(vunpckhps))]
26139pub fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26140    unsafe {
26141        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
26142        transmute(simd_select_bitmask(k, unpackhi, f32x16::ZERO))
26143    }
26144}
26145
26146/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26147///
26148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
26149#[inline]
26150#[target_feature(enable = "avx512f,avx512vl")]
26151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26152#[cfg_attr(test, assert_instr(vunpckhps))]
26153pub fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26154    unsafe {
26155        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
26156        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x8()))
26157    }
26158}
26159
26160/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26161///
26162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
26163#[inline]
26164#[target_feature(enable = "avx512f,avx512vl")]
26165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26166#[cfg_attr(test, assert_instr(vunpckhps))]
26167pub fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26168    unsafe {
26169        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
26170        transmute(simd_select_bitmask(k, unpackhi, f32x8::ZERO))
26171    }
26172}
26173
26174/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26175///
26176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
26177#[inline]
26178#[target_feature(enable = "avx512f,avx512vl")]
26179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26180#[cfg_attr(test, assert_instr(vunpckhps))]
26181pub fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26182    unsafe {
26183        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
26184        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x4()))
26185    }
26186}
26187
26188/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26189///
26190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
26191#[inline]
26192#[target_feature(enable = "avx512f,avx512vl")]
26193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26194#[cfg_attr(test, assert_instr(vunpckhps))]
26195pub fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26196    unsafe {
26197        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
26198        transmute(simd_select_bitmask(k, unpackhi, f32x4::ZERO))
26199    }
26200}
26201
26202/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26203///
26204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
26205#[inline]
26206#[target_feature(enable = "avx512f")]
26207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26208#[cfg_attr(test, assert_instr(vunpckhpd))]
26209pub fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
26210    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
26211}
26212
26213/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26214///
26215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
26216#[inline]
26217#[target_feature(enable = "avx512f")]
26218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26219#[cfg_attr(test, assert_instr(vunpckhpd))]
26220pub fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26221    unsafe {
26222        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
26223        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8()))
26224    }
26225}
26226
26227/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26228///
26229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
26230#[inline]
26231#[target_feature(enable = "avx512f")]
26232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26233#[cfg_attr(test, assert_instr(vunpckhpd))]
26234pub fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26235    unsafe {
26236        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
26237        transmute(simd_select_bitmask(k, unpackhi, f64x8::ZERO))
26238    }
26239}
26240
26241/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26242///
26243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
26244#[inline]
26245#[target_feature(enable = "avx512f,avx512vl")]
26246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26247#[cfg_attr(test, assert_instr(vunpckhpd))]
26248pub fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26249    unsafe {
26250        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
26251        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x4()))
26252    }
26253}
26254
26255/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26256///
26257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
26258#[inline]
26259#[target_feature(enable = "avx512f,avx512vl")]
26260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26261#[cfg_attr(test, assert_instr(vunpckhpd))]
26262pub fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26263    unsafe {
26264        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
26265        transmute(simd_select_bitmask(k, unpackhi, f64x4::ZERO))
26266    }
26267}
26268
26269/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26270///
26271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
26272#[inline]
26273#[target_feature(enable = "avx512f,avx512vl")]
26274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26275#[cfg_attr(test, assert_instr(vunpckhpd))]
26276pub fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26277    unsafe {
26278        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
26279        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x2()))
26280    }
26281}
26282
26283/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26284///
26285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
26286#[inline]
26287#[target_feature(enable = "avx512f,avx512vl")]
26288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26289#[cfg_attr(test, assert_instr(vunpckhpd))]
26290pub fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26291    unsafe {
26292        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
26293        transmute(simd_select_bitmask(k, unpackhi, f64x2::ZERO))
26294    }
26295}
26296
26297/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26298///
26299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
26300#[inline]
26301#[target_feature(enable = "avx512f")]
26302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26303#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
26304pub fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
26305    unsafe {
26306        let a = a.as_i32x16();
26307        let b = b.as_i32x16();
26308        #[rustfmt::skip]
26309        let r: i32x16 = simd_shuffle!(
26310            a, b,
26311            [ 0, 16, 1, 17,
26312              0 + 4, 16 + 4, 1 + 4, 17 + 4,
26313              0 + 8, 16 + 8, 1 + 8, 17 + 8,
26314              0 + 12, 16 + 12, 1 + 12, 17 + 12],
26315        );
26316        transmute(r)
26317    }
26318}
26319
26320/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26321///
26322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
26323#[inline]
26324#[target_feature(enable = "avx512f")]
26325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26326#[cfg_attr(test, assert_instr(vpunpckldq))]
26327pub fn _mm512_mask_unpacklo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26328    unsafe {
26329        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
26330        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x16()))
26331    }
26332}
26333
26334/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26335///
26336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
26337#[inline]
26338#[target_feature(enable = "avx512f")]
26339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26340#[cfg_attr(test, assert_instr(vpunpckldq))]
26341pub fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26342    unsafe {
26343        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
26344        transmute(simd_select_bitmask(k, unpacklo, i32x16::ZERO))
26345    }
26346}
26347
26348/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26349///
26350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
26351#[inline]
26352#[target_feature(enable = "avx512f,avx512vl")]
26353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26354#[cfg_attr(test, assert_instr(vpunpckldq))]
26355pub fn _mm256_mask_unpacklo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26356    unsafe {
26357        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
26358        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x8()))
26359    }
26360}
26361
26362/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26363///
26364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
26365#[inline]
26366#[target_feature(enable = "avx512f,avx512vl")]
26367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26368#[cfg_attr(test, assert_instr(vpunpckldq))]
26369pub fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26370    unsafe {
26371        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
26372        transmute(simd_select_bitmask(k, unpacklo, i32x8::ZERO))
26373    }
26374}
26375
26376/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26377///
26378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
26379#[inline]
26380#[target_feature(enable = "avx512f,avx512vl")]
26381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26382#[cfg_attr(test, assert_instr(vpunpckldq))]
26383pub fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26384    unsafe {
26385        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
26386        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x4()))
26387    }
26388}
26389
26390/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26391///
26392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
26393#[inline]
26394#[target_feature(enable = "avx512f,avx512vl")]
26395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26396#[cfg_attr(test, assert_instr(vpunpckldq))]
26397pub fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26398    unsafe {
26399        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
26400        transmute(simd_select_bitmask(k, unpacklo, i32x4::ZERO))
26401    }
26402}
26403
26404/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26405///
26406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
26407#[inline]
26408#[target_feature(enable = "avx512f")]
26409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26410#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
26411pub fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
26412    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
26413}
26414
26415/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26416///
26417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
26418#[inline]
26419#[target_feature(enable = "avx512f")]
26420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26421#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26422pub fn _mm512_mask_unpacklo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26423    unsafe {
26424        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
26425        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x8()))
26426    }
26427}
26428
26429/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26430///
26431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
26432#[inline]
26433#[target_feature(enable = "avx512f")]
26434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26435#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26436pub fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26437    unsafe {
26438        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
26439        transmute(simd_select_bitmask(k, unpacklo, i64x8::ZERO))
26440    }
26441}
26442
26443/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26444///
26445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
26446#[inline]
26447#[target_feature(enable = "avx512f,avx512vl")]
26448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26449#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26450pub fn _mm256_mask_unpacklo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26451    unsafe {
26452        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
26453        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x4()))
26454    }
26455}
26456
26457/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26458///
26459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
26460#[inline]
26461#[target_feature(enable = "avx512f,avx512vl")]
26462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26463#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26464pub fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26465    unsafe {
26466        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
26467        transmute(simd_select_bitmask(k, unpacklo, i64x4::ZERO))
26468    }
26469}
26470
26471/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26472///
26473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
26474#[inline]
26475#[target_feature(enable = "avx512f,avx512vl")]
26476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26477#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26478pub fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26479    unsafe {
26480        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
26481        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x2()))
26482    }
26483}
26484
26485/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26486///
26487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
26488#[inline]
26489#[target_feature(enable = "avx512f,avx512vl")]
26490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26491#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26492pub fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26493    unsafe {
26494        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
26495        transmute(simd_select_bitmask(k, unpacklo, i64x2::ZERO))
26496    }
26497}
26498
26499/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26500///
26501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
26502#[inline]
26503#[target_feature(enable = "avx512f")]
26504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26505#[cfg_attr(test, assert_instr(vunpcklps))]
26506pub fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
26507    unsafe {
26508        #[rustfmt::skip]
26509        simd_shuffle!(a, b,
26510                       [ 0, 16, 1, 17,
26511                         0 + 4, 16 + 4, 1 + 4, 17 + 4,
26512                         0 + 8, 16 + 8, 1 + 8, 17 + 8,
26513                         0 + 12, 16 + 12, 1 + 12, 17 + 12],
26514        )
26515    }
26516}
26517
26518/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26519///
26520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
26521#[inline]
26522#[target_feature(enable = "avx512f")]
26523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26524#[cfg_attr(test, assert_instr(vunpcklps))]
26525pub fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26526    unsafe {
26527        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
26528        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x16()))
26529    }
26530}
26531
26532/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26533///
26534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
26535#[inline]
26536#[target_feature(enable = "avx512f")]
26537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26538#[cfg_attr(test, assert_instr(vunpcklps))]
26539pub fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26540    unsafe {
26541        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
26542        transmute(simd_select_bitmask(k, unpacklo, f32x16::ZERO))
26543    }
26544}
26545
26546/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26547///
26548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
26549#[inline]
26550#[target_feature(enable = "avx512f,avx512vl")]
26551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26552#[cfg_attr(test, assert_instr(vunpcklps))]
26553pub fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26554    unsafe {
26555        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
26556        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x8()))
26557    }
26558}
26559
26560/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26561///
26562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
26563#[inline]
26564#[target_feature(enable = "avx512f,avx512vl")]
26565#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26566#[cfg_attr(test, assert_instr(vunpcklps))]
26567pub fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26568    unsafe {
26569        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
26570        transmute(simd_select_bitmask(k, unpacklo, f32x8::ZERO))
26571    }
26572}
26573
26574/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26575///
26576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
26577#[inline]
26578#[target_feature(enable = "avx512f,avx512vl")]
26579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26580#[cfg_attr(test, assert_instr(vunpcklps))]
26581pub fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26582    unsafe {
26583        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
26584        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x4()))
26585    }
26586}
26587
26588/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26589///
26590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
26591#[inline]
26592#[target_feature(enable = "avx512f,avx512vl")]
26593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26594#[cfg_attr(test, assert_instr(vunpcklps))]
26595pub fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26596    unsafe {
26597        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
26598        transmute(simd_select_bitmask(k, unpacklo, f32x4::ZERO))
26599    }
26600}
26601
26602/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26603///
26604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
26605#[inline]
26606#[target_feature(enable = "avx512f")]
26607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26608#[cfg_attr(test, assert_instr(vunpcklpd))]
26609pub fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
26610    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
26611}
26612
26613/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26614///
26615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
26616#[inline]
26617#[target_feature(enable = "avx512f")]
26618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26619#[cfg_attr(test, assert_instr(vunpcklpd))]
26620pub fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26621    unsafe {
26622        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
26623        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x8()))
26624    }
26625}
26626
26627/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26628///
26629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
26630#[inline]
26631#[target_feature(enable = "avx512f")]
26632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26633#[cfg_attr(test, assert_instr(vunpcklpd))]
26634pub fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26635    unsafe {
26636        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
26637        transmute(simd_select_bitmask(k, unpacklo, f64x8::ZERO))
26638    }
26639}
26640
26641/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26642///
26643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
26644#[inline]
26645#[target_feature(enable = "avx512f,avx512vl")]
26646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26647#[cfg_attr(test, assert_instr(vunpcklpd))]
26648pub fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26649    unsafe {
26650        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
26651        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x4()))
26652    }
26653}
26654
26655/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26656///
26657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
26658#[inline]
26659#[target_feature(enable = "avx512f,avx512vl")]
26660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26661#[cfg_attr(test, assert_instr(vunpcklpd))]
26662pub fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26663    unsafe {
26664        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
26665        transmute(simd_select_bitmask(k, unpacklo, f64x4::ZERO))
26666    }
26667}
26668
26669/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26670///
26671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
26672#[inline]
26673#[target_feature(enable = "avx512f,avx512vl")]
26674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26675#[cfg_attr(test, assert_instr(vunpcklpd))]
26676pub fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26677    unsafe {
26678        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
26679        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x2()))
26680    }
26681}
26682
26683/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26684///
26685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
26686#[inline]
26687#[target_feature(enable = "avx512f,avx512vl")]
26688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26689#[cfg_attr(test, assert_instr(vunpcklpd))]
26690pub fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26691    unsafe {
26692        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
26693        transmute(simd_select_bitmask(k, unpacklo, f64x2::ZERO))
26694    }
26695}
26696
26697/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26698///
26699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621)
26700#[inline]
26701#[target_feature(enable = "avx512f")]
26702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26703pub fn _mm512_castps128_ps512(a: __m128) -> __m512 {
26704    unsafe {
26705        simd_shuffle!(
26706            a,
26707            _mm_undefined_ps(),
26708            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
26709        )
26710    }
26711}
26712
26713/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26714///
26715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623)
26716#[inline]
26717#[target_feature(enable = "avx512f")]
26718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26719pub fn _mm512_castps256_ps512(a: __m256) -> __m512 {
26720    unsafe {
26721        simd_shuffle!(
26722            a,
26723            _mm256_undefined_ps(),
26724            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
26725        )
26726    }
26727}
26728
26729/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26730///
26731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps128_ps512&expand=6196)
26732#[inline]
26733#[target_feature(enable = "avx512f")]
26734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26735pub fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
26736    unsafe {
26737        simd_shuffle!(
26738            a,
26739            _mm_set1_ps(0.),
26740            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
26741        )
26742    }
26743}
26744
26745/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26746///
26747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps256_ps512&expand=6197)
26748#[inline]
26749#[target_feature(enable = "avx512f")]
26750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26751pub fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
26752    unsafe {
26753        simd_shuffle!(
26754            a,
26755            _mm256_set1_ps(0.),
26756            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
26757        )
26758    }
26759}
26760
26761/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26762///
26763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps128&expand=624)
26764#[inline]
26765#[target_feature(enable = "avx512f")]
26766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26767pub fn _mm512_castps512_ps128(a: __m512) -> __m128 {
26768    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26769}
26770
26771/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26772///
26773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps256&expand=625)
26774#[inline]
26775#[target_feature(enable = "avx512f")]
26776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26777pub fn _mm512_castps512_ps256(a: __m512) -> __m256 {
26778    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
26779}
26780
26781/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26782///
26783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_pd&expand=616)
26784#[inline]
26785#[target_feature(enable = "avx512f")]
26786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26787pub fn _mm512_castps_pd(a: __m512) -> __m512d {
26788    unsafe { transmute(a) }
26789}
26790
26791/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26792///
26793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_si512&expand=619)
26794#[inline]
26795#[target_feature(enable = "avx512f")]
26796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26797pub fn _mm512_castps_si512(a: __m512) -> __m512i {
26798    unsafe { transmute(a) }
26799}
26800
26801/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26802///
26803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609)
26804#[inline]
26805#[target_feature(enable = "avx512f")]
26806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26807pub fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
26808    unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26809}
26810
26811/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26812///
26813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611)
26814#[inline]
26815#[target_feature(enable = "avx512f")]
26816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26817pub fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
26818    unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26819}
26820
26821/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26822///
26823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd128_pd512&expand=6193)
26824#[inline]
26825#[target_feature(enable = "avx512f")]
26826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26827pub fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
26828    unsafe { simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) }
26829}
26830
26831/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26832///
26833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd256_pd512&expand=6194)
26834#[inline]
26835#[target_feature(enable = "avx512f")]
26836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26837pub fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
26838    unsafe { simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) }
26839}
26840
26841/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26842///
26843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd128&expand=612)
26844#[inline]
26845#[target_feature(enable = "avx512f")]
26846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26847pub fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
26848    unsafe { simd_shuffle!(a, a, [0, 1]) }
26849}
26850
26851/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26852///
26853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd256&expand=613)
26854#[inline]
26855#[target_feature(enable = "avx512f")]
26856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26857pub fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
26858    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26859}
26860
26861/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26862///
26863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_ps&expand=604)
26864#[inline]
26865#[target_feature(enable = "avx512f")]
26866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26867pub fn _mm512_castpd_ps(a: __m512d) -> __m512 {
26868    unsafe { transmute(a) }
26869}
26870
26871/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26872///
26873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_si512&expand=607)
26874#[inline]
26875#[target_feature(enable = "avx512f")]
26876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26877pub fn _mm512_castpd_si512(a: __m512d) -> __m512i {
26878    unsafe { transmute(a) }
26879}
26880
26881/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26882///
26883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629)
26884#[inline]
26885#[target_feature(enable = "avx512f")]
26886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26887pub fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
26888    unsafe { simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26889}
26890
26891/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26892///
26893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633)
26894#[inline]
26895#[target_feature(enable = "avx512f")]
26896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26897pub fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
26898    unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26899}
26900
26901/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26902///
26903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi128_si512&expand=6199)
26904#[inline]
26905#[target_feature(enable = "avx512f")]
26906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26907pub fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
26908    unsafe { simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26909}
26910
26911/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26912///
26913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi256_si512&expand=6200)
26914#[inline]
26915#[target_feature(enable = "avx512f")]
26916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26917pub fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
26918    unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26919}
26920
26921/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26922///
26923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si128&expand=636)
26924#[inline]
26925#[target_feature(enable = "avx512f")]
26926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26927pub fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
26928    unsafe { simd_shuffle!(a, a, [0, 1]) }
26929}
26930
26931/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26932///
26933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si256&expand=637)
26934#[inline]
26935#[target_feature(enable = "avx512f")]
26936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26937pub fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
26938    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26939}
26940
26941/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26942///
26943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_ps&expand=635)
26944#[inline]
26945#[target_feature(enable = "avx512f")]
26946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26947pub fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
26948    unsafe { transmute(a) }
26949}
26950
26951/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26952///
26953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_pd&expand=634)
26954#[inline]
26955#[target_feature(enable = "avx512f")]
26956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26957pub fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
26958    unsafe { transmute(a) }
26959}
26960
26961/// Copy the lower 32-bit integer in a to dst.
26962///
26963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
26964#[inline]
26965#[target_feature(enable = "avx512f")]
26966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26967#[cfg_attr(test, assert_instr(vmovd))]
26968pub fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
26969    unsafe { simd_extract!(a.as_i32x16(), 0) }
26970}
26971
26972/// Copy the lower single-precision (32-bit) floating-point element of a to dst.
26973///
26974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32)
26975#[inline]
26976#[target_feature(enable = "avx512f")]
26977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26978pub fn _mm512_cvtss_f32(a: __m512) -> f32 {
26979    unsafe { simd_extract!(a, 0) }
26980}
26981
26982/// Copy the lower double-precision (64-bit) floating-point element of a to dst.
26983///
26984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64)
26985#[inline]
26986#[target_feature(enable = "avx512f")]
26987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26988pub fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
26989    unsafe { simd_extract!(a, 0) }
26990}
26991
26992/// Broadcast the low packed 32-bit integer from a to all elements of dst.
26993///
26994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
26995#[inline]
26996#[target_feature(enable = "avx512f")]
26997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26998#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
26999pub fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
27000    unsafe {
27001        let a = _mm512_castsi128_si512(a).as_i32x16();
27002        let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
27003        transmute(ret)
27004    }
27005}
27006
27007/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27008///
27009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
27010#[inline]
27011#[target_feature(enable = "avx512f")]
27012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27013#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27014pub fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
27015    unsafe {
27016        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
27017        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
27018    }
27019}
27020
27021/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27022///
27023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
27024#[inline]
27025#[target_feature(enable = "avx512f")]
27026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27027#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27028pub fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
27029    unsafe {
27030        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
27031        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
27032    }
27033}
27034
27035/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27036///
27037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
27038#[inline]
27039#[target_feature(enable = "avx512f,avx512vl")]
27040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27041#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27042pub fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27043    unsafe {
27044        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
27045        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
27046    }
27047}
27048
27049/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27050///
27051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
27052#[inline]
27053#[target_feature(enable = "avx512f,avx512vl")]
27054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27055#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27056pub fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
27057    unsafe {
27058        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
27059        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
27060    }
27061}
27062
27063/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27064///
27065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
27066#[inline]
27067#[target_feature(enable = "avx512f,avx512vl")]
27068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27069#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27070pub fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27071    unsafe {
27072        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
27073        transmute(simd_select_bitmask(k, broadcast, src.as_i32x4()))
27074    }
27075}
27076
27077/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27078///
27079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
27080#[inline]
27081#[target_feature(enable = "avx512f,avx512vl")]
27082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27083#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27084pub fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
27085    unsafe {
27086        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
27087        transmute(simd_select_bitmask(k, broadcast, i32x4::ZERO))
27088    }
27089}
27090
27091/// Broadcast the low packed 64-bit integer from a to all elements of dst.
27092///
27093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
27094#[inline]
27095#[target_feature(enable = "avx512f")]
27096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27097#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
27098pub fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
27099    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
27100}
27101
27102/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27103///
27104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
27105#[inline]
27106#[target_feature(enable = "avx512f")]
27107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27108#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27109pub fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
27110    unsafe {
27111        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
27112        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
27113    }
27114}
27115
27116/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27117///
27118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
27119#[inline]
27120#[target_feature(enable = "avx512f")]
27121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27122#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27123pub fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
27124    unsafe {
27125        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
27126        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
27127    }
27128}
27129
27130/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27131///
27132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
27133#[inline]
27134#[target_feature(enable = "avx512f,avx512vl")]
27135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27136#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27137pub fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27138    unsafe {
27139        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
27140        transmute(simd_select_bitmask(k, broadcast, src.as_i64x4()))
27141    }
27142}
27143
27144/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27145///
27146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
27147#[inline]
27148#[target_feature(enable = "avx512f,avx512vl")]
27149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27150#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27151pub fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
27152    unsafe {
27153        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
27154        transmute(simd_select_bitmask(k, broadcast, i64x4::ZERO))
27155    }
27156}
27157
27158/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27159///
27160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
27161#[inline]
27162#[target_feature(enable = "avx512f,avx512vl")]
27163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27164#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27165pub fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27166    unsafe {
27167        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
27168        transmute(simd_select_bitmask(k, broadcast, src.as_i64x2()))
27169    }
27170}
27171
27172/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27173///
27174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
27175#[inline]
27176#[target_feature(enable = "avx512f,avx512vl")]
27177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27178#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27179pub fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
27180    unsafe {
27181        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
27182        transmute(simd_select_bitmask(k, broadcast, i64x2::ZERO))
27183    }
27184}
27185
27186/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
27187///
27188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
27189#[inline]
27190#[target_feature(enable = "avx512f")]
27191#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27192#[cfg_attr(test, assert_instr(vbroadcastss))]
27193pub fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
27194    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) }
27195}
27196
27197/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27198///
27199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
27200#[inline]
27201#[target_feature(enable = "avx512f")]
27202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27203#[cfg_attr(test, assert_instr(vbroadcastss))]
27204pub fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27205    unsafe {
27206        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
27207        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
27208    }
27209}
27210
27211/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27212///
27213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
27214#[inline]
27215#[target_feature(enable = "avx512f")]
27216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27217#[cfg_attr(test, assert_instr(vbroadcastss))]
27218pub fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
27219    unsafe {
27220        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
27221        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
27222    }
27223}
27224
27225/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27226///
27227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
27228#[inline]
27229#[target_feature(enable = "avx512f,avx512vl")]
27230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27231#[cfg_attr(test, assert_instr(vbroadcastss))]
27232pub fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27233    unsafe {
27234        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
27235        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
27236    }
27237}
27238
27239/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27240///
27241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
27242#[inline]
27243#[target_feature(enable = "avx512f,avx512vl")]
27244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27245#[cfg_attr(test, assert_instr(vbroadcastss))]
27246pub fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
27247    unsafe {
27248        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
27249        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
27250    }
27251}
27252
27253/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27254///
27255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
27256#[inline]
27257#[target_feature(enable = "avx512f,avx512vl")]
27258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27259#[cfg_attr(test, assert_instr(vbroadcastss))]
27260pub fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
27261    unsafe {
27262        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
27263        transmute(simd_select_bitmask(k, broadcast, src.as_f32x4()))
27264    }
27265}
27266
27267/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27268///
27269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
27270#[inline]
27271#[target_feature(enable = "avx512f,avx512vl")]
27272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27273#[cfg_attr(test, assert_instr(vbroadcastss))]
27274pub fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
27275    unsafe {
27276        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
27277        transmute(simd_select_bitmask(k, broadcast, f32x4::ZERO))
27278    }
27279}
27280
27281/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
27282///
27283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
27284#[inline]
27285#[target_feature(enable = "avx512f")]
27286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27287#[cfg_attr(test, assert_instr(vbroadcastsd))]
27288pub fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
27289    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
27290}
27291
27292/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27293///
27294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
27295#[inline]
27296#[target_feature(enable = "avx512f")]
27297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27298#[cfg_attr(test, assert_instr(vbroadcastsd))]
27299pub fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
27300    unsafe {
27301        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
27302        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
27303    }
27304}
27305
27306/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27307///
27308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
27309#[inline]
27310#[target_feature(enable = "avx512f")]
27311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27312#[cfg_attr(test, assert_instr(vbroadcastsd))]
27313pub fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
27314    unsafe {
27315        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
27316        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
27317    }
27318}
27319
27320/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27321///
27322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
27323#[inline]
27324#[target_feature(enable = "avx512f,avx512vl")]
27325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27326#[cfg_attr(test, assert_instr(vbroadcastsd))]
27327pub fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
27328    unsafe {
27329        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
27330        transmute(simd_select_bitmask(k, broadcast, src.as_f64x4()))
27331    }
27332}
27333
27334/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27335///
27336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
27337#[inline]
27338#[target_feature(enable = "avx512f,avx512vl")]
27339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27340#[cfg_attr(test, assert_instr(vbroadcastsd))]
27341pub fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
27342    unsafe {
27343        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
27344        transmute(simd_select_bitmask(k, broadcast, f64x4::ZERO))
27345    }
27346}
27347
27348/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27349///
27350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
27351#[inline]
27352#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27354pub fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
27355    unsafe {
27356        let a = a.as_i32x4();
27357        let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
27358        transmute(ret)
27359    }
27360}
27361
27362/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27363///
27364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
27365#[inline]
27366#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27368pub fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
27369    unsafe {
27370        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
27371        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
27372    }
27373}
27374
27375/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27376///
27377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
27378#[inline]
27379#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27381pub fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
27382    unsafe {
27383        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
27384        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
27385    }
27386}
27387
27388/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27389///
27390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
27391#[inline]
27392#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27393#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27394pub fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
27395    unsafe {
27396        let a = a.as_i32x4();
27397        let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
27398        transmute(ret)
27399    }
27400}
27401
27402/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27403///
27404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
27405#[inline]
27406#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27408pub fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27409    unsafe {
27410        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
27411        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
27412    }
27413}
27414
27415/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27416///
27417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
27418#[inline]
27419#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27421pub fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
27422    unsafe {
27423        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
27424        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
27425    }
27426}
27427
27428/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
27429///
27430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x4&expand=522)
27431#[inline]
27432#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27433#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27434pub fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
27435    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27436}
27437
27438/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27439///
27440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x4&expand=523)
27441#[inline]
27442#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27444pub fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
27445    unsafe {
27446        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
27447        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
27448    }
27449}
27450
27451/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27452///
27453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x4&expand=524)
27454#[inline]
27455#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27457pub fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
27458    unsafe {
27459        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
27460        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
27461    }
27462}
27463
27464/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27465///
27466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
27467#[inline]
27468#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
27469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27470pub fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
27471    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) }
27472}
27473
27474/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27475///
27476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
27477#[inline]
27478#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27480pub fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27481    unsafe {
27482        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
27483        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
27484    }
27485}
27486
27487/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27488///
27489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
27490#[inline]
27491#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27493pub fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
27494    unsafe {
27495        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
27496        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
27497    }
27498}
27499
27500/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27501///
27502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
27503#[inline]
27504#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
27505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27506pub fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
27507    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27508}
27509
27510/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27511///
27512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
27513#[inline]
27514#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27516pub fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27517    unsafe {
27518        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
27519        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
27520    }
27521}
27522
27523/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27524///
27525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
27526#[inline]
27527#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27529pub fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
27530    unsafe {
27531        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
27532        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
27533    }
27534}
27535
27536/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
27537///
27538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x4&expand=495)
27539#[inline]
27540#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
27541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27542pub fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
27543    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27544}
27545
27546/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27547///
27548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x4&expand=496)
27549#[inline]
27550#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27552pub fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
27553    unsafe {
27554        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
27555        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
27556    }
27557}
27558
27559/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27560///
27561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x4&expand=497)
27562#[inline]
27563#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27565pub fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
27566    unsafe {
27567        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
27568        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
27569    }
27570}
27571
27572/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27573///
27574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
27575#[inline]
27576#[target_feature(enable = "avx512f")]
27577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27578#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27579pub fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27580    unsafe { transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16())) }
27581}
27582
27583/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27584///
27585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
27586#[inline]
27587#[target_feature(enable = "avx512f,avx512vl")]
27588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27589#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27590pub fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27591    unsafe { transmute(simd_select_bitmask(k, b.as_i32x8(), a.as_i32x8())) }
27592}
27593
27594/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27595///
27596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
27597#[inline]
27598#[target_feature(enable = "avx512f,avx512vl")]
27599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27600#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27601pub fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27602    unsafe { transmute(simd_select_bitmask(k, b.as_i32x4(), a.as_i32x4())) }
27603}
27604
27605/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27606///
27607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
27608#[inline]
27609#[target_feature(enable = "avx512f")]
27610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27611#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27612pub fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27613    unsafe { transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8())) }
27614}
27615
27616/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27617///
27618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
27619#[inline]
27620#[target_feature(enable = "avx512f,avx512vl")]
27621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27622#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27623pub fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27624    unsafe { transmute(simd_select_bitmask(k, b.as_i64x4(), a.as_i64x4())) }
27625}
27626
27627/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27628///
27629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
27630#[inline]
27631#[target_feature(enable = "avx512f,avx512vl")]
27632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27633#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27634pub fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27635    unsafe { transmute(simd_select_bitmask(k, b.as_i64x2(), a.as_i64x2())) }
27636}
27637
27638/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27639///
27640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
27641#[inline]
27642#[target_feature(enable = "avx512f")]
27643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27644#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27645pub fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27646    unsafe { transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16())) }
27647}
27648
27649/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27650///
27651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
27652#[inline]
27653#[target_feature(enable = "avx512f,avx512vl")]
27654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27655#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27656pub fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27657    unsafe { transmute(simd_select_bitmask(k, b.as_f32x8(), a.as_f32x8())) }
27658}
27659
27660/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27661///
27662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
27663#[inline]
27664#[target_feature(enable = "avx512f,avx512vl")]
27665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27666#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27667pub fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27668    unsafe { transmute(simd_select_bitmask(k, b.as_f32x4(), a.as_f32x4())) }
27669}
27670
27671/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27672///
27673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
27674#[inline]
27675#[target_feature(enable = "avx512f")]
27676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27677#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27678pub fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27679    unsafe { transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8())) }
27680}
27681
27682/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27683///
27684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
27685#[inline]
27686#[target_feature(enable = "avx512f,avx512vl")]
27687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27688#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27689pub fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27690    unsafe { transmute(simd_select_bitmask(k, b.as_f64x4(), a.as_f64x4())) }
27691}
27692
27693/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27694///
27695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
27696#[inline]
27697#[target_feature(enable = "avx512f,avx512vl")]
27698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27699#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27700pub fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27701    unsafe { transmute(simd_select_bitmask(k, b.as_f64x2(), a.as_f64x2())) }
27702}
27703
27704/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
27705///
27706/// <div class="warning">Only lowest <strong>4 bits</strong> are used from the mask (shift at maximum by 60 bytes)!</div>
27707///
27708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi32&expand=245)
27709#[inline]
27710#[target_feature(enable = "avx512f")]
27711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27712#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27713#[rustc_legacy_const_generics(2)]
27714pub fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27715    unsafe {
27716        static_assert_uimm_bits!(IMM8, 8);
27717        let a = a.as_i32x16();
27718        let b = b.as_i32x16();
27719        let imm8: i32 = IMM8 % 16;
27720        let r: i32x16 = match imm8 {
27721            0 => simd_shuffle!(
27722                a,
27723                b,
27724                [
27725                    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
27726                ],
27727            ),
27728            1 => simd_shuffle!(
27729                a,
27730                b,
27731                [
27732                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
27733                ],
27734            ),
27735            2 => simd_shuffle!(
27736                a,
27737                b,
27738                [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
27739            ),
27740            3 => simd_shuffle!(
27741                a,
27742                b,
27743                [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
27744            ),
27745            4 => simd_shuffle!(
27746                a,
27747                b,
27748                [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
27749            ),
27750            5 => simd_shuffle!(
27751                a,
27752                b,
27753                [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
27754            ),
27755            6 => simd_shuffle!(
27756                a,
27757                b,
27758                [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
27759            ),
27760            7 => simd_shuffle!(
27761                a,
27762                b,
27763                [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
27764            ),
27765            8 => simd_shuffle!(
27766                a,
27767                b,
27768                [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
27769            ),
27770            9 => simd_shuffle!(
27771                a,
27772                b,
27773                [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
27774            ),
27775            10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
27776            11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
27777            12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
27778            13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
27779            14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
27780            15 => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
27781            _ => unreachable_unchecked(),
27782        };
27783        transmute(r)
27784    }
27785}
27786
27787/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27788///
27789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi32&expand=246)
27790#[inline]
27791#[target_feature(enable = "avx512f")]
27792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27793#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27794#[rustc_legacy_const_generics(4)]
27795pub fn _mm512_mask_alignr_epi32<const IMM8: i32>(
27796    src: __m512i,
27797    k: __mmask16,
27798    a: __m512i,
27799    b: __m512i,
27800) -> __m512i {
27801    unsafe {
27802        static_assert_uimm_bits!(IMM8, 8);
27803        let r = _mm512_alignr_epi32::<IMM8>(a, b);
27804        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
27805    }
27806}
27807
27808/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27809///
27810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi32&expand=247)
27811#[inline]
27812#[target_feature(enable = "avx512f")]
27813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27814#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27815#[rustc_legacy_const_generics(3)]
27816pub fn _mm512_maskz_alignr_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27817    unsafe {
27818        static_assert_uimm_bits!(IMM8, 8);
27819        let r = _mm512_alignr_epi32::<IMM8>(a, b);
27820        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
27821    }
27822}
27823
27824/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
27825///
27826/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 28 bytes)!</div>
27827///
27828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
27829#[inline]
27830#[target_feature(enable = "avx512f,avx512vl")]
27831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27832#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27833#[rustc_legacy_const_generics(2)]
27834pub fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
27835    unsafe {
27836        static_assert_uimm_bits!(IMM8, 8);
27837        let a = a.as_i32x8();
27838        let b = b.as_i32x8();
27839        let imm8: i32 = IMM8 % 8;
27840        let r: i32x8 = match imm8 {
27841            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
27842            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
27843            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
27844            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
27845            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
27846            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
27847            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
27848            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
27849            _ => unreachable_unchecked(),
27850        };
27851        transmute(r)
27852    }
27853}
27854
27855/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27856///
27857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
27858#[inline]
27859#[target_feature(enable = "avx512f,avx512vl")]
27860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27861#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27862#[rustc_legacy_const_generics(4)]
27863pub fn _mm256_mask_alignr_epi32<const IMM8: i32>(
27864    src: __m256i,
27865    k: __mmask8,
27866    a: __m256i,
27867    b: __m256i,
27868) -> __m256i {
27869    unsafe {
27870        static_assert_uimm_bits!(IMM8, 8);
27871        let r = _mm256_alignr_epi32::<IMM8>(a, b);
27872        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
27873    }
27874}
27875
27876/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27877///
27878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
27879#[inline]
27880#[target_feature(enable = "avx512f,avx512vl")]
27881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27882#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27883#[rustc_legacy_const_generics(3)]
27884pub fn _mm256_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27885    unsafe {
27886        static_assert_uimm_bits!(IMM8, 8);
27887        let r = _mm256_alignr_epi32::<IMM8>(a, b);
27888        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
27889    }
27890}
27891
27892/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
27893///
27894/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 12 bytes)!</div>
27895///
27896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
27897#[inline]
27898#[target_feature(enable = "avx512f,avx512vl")]
27899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27900#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd
27901#[rustc_legacy_const_generics(2)]
27902pub fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
27903    unsafe {
27904        static_assert_uimm_bits!(IMM8, 8);
27905        let a = a.as_i32x4();
27906        let b = b.as_i32x4();
27907        let imm8: i32 = IMM8 % 4;
27908        let r: i32x4 = match imm8 {
27909            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
27910            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
27911            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
27912            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
27913            _ => unreachable_unchecked(),
27914        };
27915        transmute(r)
27916    }
27917}
27918
27919/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27920///
27921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
27922#[inline]
27923#[target_feature(enable = "avx512f,avx512vl")]
27924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27925#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27926#[rustc_legacy_const_generics(4)]
27927pub fn _mm_mask_alignr_epi32<const IMM8: i32>(
27928    src: __m128i,
27929    k: __mmask8,
27930    a: __m128i,
27931    b: __m128i,
27932) -> __m128i {
27933    unsafe {
27934        static_assert_uimm_bits!(IMM8, 8);
27935        let r = _mm_alignr_epi32::<IMM8>(a, b);
27936        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
27937    }
27938}
27939
27940/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27941///
27942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
27943#[inline]
27944#[target_feature(enable = "avx512f,avx512vl")]
27945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27946#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27947#[rustc_legacy_const_generics(3)]
27948pub fn _mm_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27949    unsafe {
27950        static_assert_uimm_bits!(IMM8, 8);
27951        let r = _mm_alignr_epi32::<IMM8>(a, b);
27952        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
27953    }
27954}
27955
27956/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
27957///
27958/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 56 bytes)!</div>
27959///
27960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi64&expand=254)
27961#[inline]
27962#[target_feature(enable = "avx512f")]
27963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27964#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27965#[rustc_legacy_const_generics(2)]
27966pub fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27967    unsafe {
27968        static_assert_uimm_bits!(IMM8, 8);
27969        let imm8: i32 = IMM8 % 8;
27970        let r: i64x8 = match imm8 {
27971            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
27972            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
27973            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
27974            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
27975            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
27976            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
27977            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
27978            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
27979            _ => unreachable_unchecked(),
27980        };
27981        transmute(r)
27982    }
27983}
27984
27985/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27986///
27987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi64&expand=255)
27988#[inline]
27989#[target_feature(enable = "avx512f")]
27990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27991#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27992#[rustc_legacy_const_generics(4)]
27993pub fn _mm512_mask_alignr_epi64<const IMM8: i32>(
27994    src: __m512i,
27995    k: __mmask8,
27996    a: __m512i,
27997    b: __m512i,
27998) -> __m512i {
27999    unsafe {
28000        static_assert_uimm_bits!(IMM8, 8);
28001        let r = _mm512_alignr_epi64::<IMM8>(a, b);
28002        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
28003    }
28004}
28005
28006/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28007///
28008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi64&expand=256)
28009#[inline]
28010#[target_feature(enable = "avx512f")]
28011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28012#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28013#[rustc_legacy_const_generics(3)]
28014pub fn _mm512_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28015    unsafe {
28016        static_assert_uimm_bits!(IMM8, 8);
28017        let r = _mm512_alignr_epi64::<IMM8>(a, b);
28018        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
28019    }
28020}
28021
28022/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
28023///
28024/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 24 bytes)!</div>
28025///
28026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
28027#[inline]
28028#[target_feature(enable = "avx512f,avx512vl")]
28029#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28030#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28031#[rustc_legacy_const_generics(2)]
28032pub fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
28033    unsafe {
28034        static_assert_uimm_bits!(IMM8, 8);
28035        let imm8: i32 = IMM8 % 4;
28036        let r: i64x4 = match imm8 {
28037            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
28038            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
28039            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
28040            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
28041            _ => unreachable_unchecked(),
28042        };
28043        transmute(r)
28044    }
28045}
28046
28047/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28048///
28049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
28050#[inline]
28051#[target_feature(enable = "avx512f,avx512vl")]
28052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28053#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28054#[rustc_legacy_const_generics(4)]
28055pub fn _mm256_mask_alignr_epi64<const IMM8: i32>(
28056    src: __m256i,
28057    k: __mmask8,
28058    a: __m256i,
28059    b: __m256i,
28060) -> __m256i {
28061    unsafe {
28062        static_assert_uimm_bits!(IMM8, 8);
28063        let r = _mm256_alignr_epi64::<IMM8>(a, b);
28064        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
28065    }
28066}
28067
28068/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28069///
28070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
28071#[inline]
28072#[target_feature(enable = "avx512f,avx512vl")]
28073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28074#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28075#[rustc_legacy_const_generics(3)]
28076pub fn _mm256_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28077    unsafe {
28078        static_assert_uimm_bits!(IMM8, 8);
28079        let r = _mm256_alignr_epi64::<IMM8>(a, b);
28080        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
28081    }
28082}
28083
28084/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
28085///
28086/// <div class="warning">Only lowest <strong>bit</strong> is used from the mask (shift at maximum by 8 bytes)!</div>
28087///
28088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
28089#[inline]
28090#[target_feature(enable = "avx512f,avx512vl")]
28091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28092#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq
28093#[rustc_legacy_const_generics(2)]
28094pub fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
28095    unsafe {
28096        static_assert_uimm_bits!(IMM8, 8);
28097        let imm8: i32 = IMM8 % 2;
28098        let r: i64x2 = match imm8 {
28099            0 => simd_shuffle!(a, b, [2, 3]),
28100            1 => simd_shuffle!(a, b, [3, 0]),
28101            _ => unreachable_unchecked(),
28102        };
28103        transmute(r)
28104    }
28105}
28106
28107/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28108///
28109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
28110#[inline]
28111#[target_feature(enable = "avx512f,avx512vl")]
28112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28113#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28114#[rustc_legacy_const_generics(4)]
28115pub fn _mm_mask_alignr_epi64<const IMM8: i32>(
28116    src: __m128i,
28117    k: __mmask8,
28118    a: __m128i,
28119    b: __m128i,
28120) -> __m128i {
28121    unsafe {
28122        static_assert_uimm_bits!(IMM8, 8);
28123        let r = _mm_alignr_epi64::<IMM8>(a, b);
28124        transmute(simd_select_bitmask(k, r.as_i64x2(), src.as_i64x2()))
28125    }
28126}
28127
28128/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28129///
28130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
28131#[inline]
28132#[target_feature(enable = "avx512f,avx512vl")]
28133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28134#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28135#[rustc_legacy_const_generics(3)]
28136pub fn _mm_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28137    unsafe {
28138        static_assert_uimm_bits!(IMM8, 8);
28139        let r = _mm_alignr_epi64::<IMM8>(a, b);
28140        transmute(simd_select_bitmask(k, r.as_i64x2(), i64x2::ZERO))
28141    }
28142}
28143
28144/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
28145///
28146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
28147#[inline]
28148#[target_feature(enable = "avx512f")]
28149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28150#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
28151pub fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
28152    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
28153}
28154
28155/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28156///
28157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
28158#[inline]
28159#[target_feature(enable = "avx512f")]
28160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28161#[cfg_attr(test, assert_instr(vpandd))]
28162pub fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28163    unsafe {
28164        let and = _mm512_and_epi32(a, b).as_i32x16();
28165        transmute(simd_select_bitmask(k, and, src.as_i32x16()))
28166    }
28167}
28168
28169/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28170///
28171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
28172#[inline]
28173#[target_feature(enable = "avx512f")]
28174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28175#[cfg_attr(test, assert_instr(vpandd))]
28176pub fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28177    unsafe {
28178        let and = _mm512_and_epi32(a, b).as_i32x16();
28179        transmute(simd_select_bitmask(k, and, i32x16::ZERO))
28180    }
28181}
28182
28183/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28184///
28185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
28186#[inline]
28187#[target_feature(enable = "avx512f,avx512vl")]
28188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28189#[cfg_attr(test, assert_instr(vpandd))]
28190pub fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28191    unsafe {
28192        let and = simd_and(a.as_i32x8(), b.as_i32x8());
28193        transmute(simd_select_bitmask(k, and, src.as_i32x8()))
28194    }
28195}
28196
28197/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28198///
28199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
28200#[inline]
28201#[target_feature(enable = "avx512f,avx512vl")]
28202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28203#[cfg_attr(test, assert_instr(vpandd))]
28204pub fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28205    unsafe {
28206        let and = simd_and(a.as_i32x8(), b.as_i32x8());
28207        transmute(simd_select_bitmask(k, and, i32x8::ZERO))
28208    }
28209}
28210
28211/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28212///
28213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
28214#[inline]
28215#[target_feature(enable = "avx512f,avx512vl")]
28216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28217#[cfg_attr(test, assert_instr(vpandd))]
28218pub fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28219    unsafe {
28220        let and = simd_and(a.as_i32x4(), b.as_i32x4());
28221        transmute(simd_select_bitmask(k, and, src.as_i32x4()))
28222    }
28223}
28224
28225/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28226///
28227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
28228#[inline]
28229#[target_feature(enable = "avx512f,avx512vl")]
28230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28231#[cfg_attr(test, assert_instr(vpandd))]
28232pub fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28233    unsafe {
28234        let and = simd_and(a.as_i32x4(), b.as_i32x4());
28235        transmute(simd_select_bitmask(k, and, i32x4::ZERO))
28236    }
28237}
28238
28239/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
28240///
28241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
28242#[inline]
28243#[target_feature(enable = "avx512f")]
28244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28245#[cfg_attr(test, assert_instr(vpandq))]
28246pub fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
28247    unsafe { transmute(simd_and(a.as_i64x8(), b.as_i64x8())) }
28248}
28249
28250/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28251///
28252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
28253#[inline]
28254#[target_feature(enable = "avx512f")]
28255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28256#[cfg_attr(test, assert_instr(vpandq))]
28257pub fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28258    unsafe {
28259        let and = _mm512_and_epi64(a, b).as_i64x8();
28260        transmute(simd_select_bitmask(k, and, src.as_i64x8()))
28261    }
28262}
28263
28264/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28265///
28266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
28267#[inline]
28268#[target_feature(enable = "avx512f")]
28269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28270#[cfg_attr(test, assert_instr(vpandq))]
28271pub fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28272    unsafe {
28273        let and = _mm512_and_epi64(a, b).as_i64x8();
28274        transmute(simd_select_bitmask(k, and, i64x8::ZERO))
28275    }
28276}
28277
28278/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28279///
28280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
28281#[inline]
28282#[target_feature(enable = "avx512f,avx512vl")]
28283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28284#[cfg_attr(test, assert_instr(vpandq))]
28285pub fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28286    unsafe {
28287        let and = simd_and(a.as_i64x4(), b.as_i64x4());
28288        transmute(simd_select_bitmask(k, and, src.as_i64x4()))
28289    }
28290}
28291
28292/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28293///
28294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
28295#[inline]
28296#[target_feature(enable = "avx512f,avx512vl")]
28297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28298#[cfg_attr(test, assert_instr(vpandq))]
28299pub fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28300    unsafe {
28301        let and = simd_and(a.as_i64x4(), b.as_i64x4());
28302        transmute(simd_select_bitmask(k, and, i64x4::ZERO))
28303    }
28304}
28305
28306/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28307///
28308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
28309#[inline]
28310#[target_feature(enable = "avx512f,avx512vl")]
28311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28312#[cfg_attr(test, assert_instr(vpandq))]
28313pub fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28314    unsafe {
28315        let and = simd_and(a.as_i64x2(), b.as_i64x2());
28316        transmute(simd_select_bitmask(k, and, src.as_i64x2()))
28317    }
28318}
28319
28320/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28321///
28322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
28323#[inline]
28324#[target_feature(enable = "avx512f,avx512vl")]
28325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28326#[cfg_attr(test, assert_instr(vpandq))]
28327pub fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28328    unsafe {
28329        let and = simd_and(a.as_i64x2(), b.as_i64x2());
28330        transmute(simd_select_bitmask(k, and, i64x2::ZERO))
28331    }
28332}
28333
28334/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
28335///
28336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
28337#[inline]
28338#[target_feature(enable = "avx512f")]
28339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28340#[cfg_attr(test, assert_instr(vpandq))]
28341pub fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
28342    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
28343}
28344
28345/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28346///
28347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
28348#[inline]
28349#[target_feature(enable = "avx512f")]
28350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28351#[cfg_attr(test, assert_instr(vporq))]
28352pub fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
28353    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
28354}
28355
28356/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28357///
28358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
28359#[inline]
28360#[target_feature(enable = "avx512f")]
28361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28362#[cfg_attr(test, assert_instr(vpord))]
28363pub fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28364    unsafe {
28365        let or = _mm512_or_epi32(a, b).as_i32x16();
28366        transmute(simd_select_bitmask(k, or, src.as_i32x16()))
28367    }
28368}
28369
28370/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28371///
28372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
28373#[inline]
28374#[target_feature(enable = "avx512f")]
28375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28376#[cfg_attr(test, assert_instr(vpord))]
28377pub fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28378    unsafe {
28379        let or = _mm512_or_epi32(a, b).as_i32x16();
28380        transmute(simd_select_bitmask(k, or, i32x16::ZERO))
28381    }
28382}
28383
28384/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28385///
28386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
28387#[inline]
28388#[target_feature(enable = "avx512f,avx512vl")]
28389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28390#[cfg_attr(test, assert_instr(vor))] //should be vpord
28391pub fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
28392    unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
28393}
28394
28395/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28396///
28397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
28398#[inline]
28399#[target_feature(enable = "avx512f,avx512vl")]
28400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28401#[cfg_attr(test, assert_instr(vpord))]
28402pub fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28403    unsafe {
28404        let or = _mm256_or_epi32(a, b).as_i32x8();
28405        transmute(simd_select_bitmask(k, or, src.as_i32x8()))
28406    }
28407}
28408
28409/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28410///
28411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
28412#[inline]
28413#[target_feature(enable = "avx512f,avx512vl")]
28414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28415#[cfg_attr(test, assert_instr(vpord))]
28416pub fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28417    unsafe {
28418        let or = _mm256_or_epi32(a, b).as_i32x8();
28419        transmute(simd_select_bitmask(k, or, i32x8::ZERO))
28420    }
28421}
28422
28423/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28424///
28425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
28426#[inline]
28427#[target_feature(enable = "avx512f,avx512vl")]
28428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28429#[cfg_attr(test, assert_instr(vor))] //should be vpord
28430pub fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
28431    unsafe { transmute(simd_or(a.as_i32x4(), b.as_i32x4())) }
28432}
28433
28434/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28435///
28436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
28437#[inline]
28438#[target_feature(enable = "avx512f,avx512vl")]
28439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28440#[cfg_attr(test, assert_instr(vpord))]
28441pub fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28442    unsafe {
28443        let or = _mm_or_epi32(a, b).as_i32x4();
28444        transmute(simd_select_bitmask(k, or, src.as_i32x4()))
28445    }
28446}
28447
28448/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28449///
28450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
28451#[inline]
28452#[target_feature(enable = "avx512f,avx512vl")]
28453#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28454#[cfg_attr(test, assert_instr(vpord))]
28455pub fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28456    unsafe {
28457        let or = _mm_or_epi32(a, b).as_i32x4();
28458        transmute(simd_select_bitmask(k, or, i32x4::ZERO))
28459    }
28460}
28461
28462/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28463///
28464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
28465#[inline]
28466#[target_feature(enable = "avx512f")]
28467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28468#[cfg_attr(test, assert_instr(vporq))]
28469pub fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
28470    unsafe { transmute(simd_or(a.as_i64x8(), b.as_i64x8())) }
28471}
28472
28473/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28474///
28475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
28476#[inline]
28477#[target_feature(enable = "avx512f")]
28478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28479#[cfg_attr(test, assert_instr(vporq))]
28480pub fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28481    unsafe {
28482        let or = _mm512_or_epi64(a, b).as_i64x8();
28483        transmute(simd_select_bitmask(k, or, src.as_i64x8()))
28484    }
28485}
28486
28487/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28488///
28489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
28490#[inline]
28491#[target_feature(enable = "avx512f")]
28492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28493#[cfg_attr(test, assert_instr(vporq))]
28494pub fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28495    unsafe {
28496        let or = _mm512_or_epi64(a, b).as_i64x8();
28497        transmute(simd_select_bitmask(k, or, i64x8::ZERO))
28498    }
28499}
28500
28501/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28502///
28503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
28504#[inline]
28505#[target_feature(enable = "avx512f,avx512vl")]
28506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28507#[cfg_attr(test, assert_instr(vor))] //should be vporq
28508pub fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
28509    unsafe { transmute(simd_or(a.as_i64x4(), b.as_i64x4())) }
28510}
28511
28512/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28513///
28514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
28515#[inline]
28516#[target_feature(enable = "avx512f,avx512vl")]
28517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28518#[cfg_attr(test, assert_instr(vporq))]
28519pub fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28520    unsafe {
28521        let or = _mm256_or_epi64(a, b).as_i64x4();
28522        transmute(simd_select_bitmask(k, or, src.as_i64x4()))
28523    }
28524}
28525
28526/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28527///
28528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
28529#[inline]
28530#[target_feature(enable = "avx512f,avx512vl")]
28531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28532#[cfg_attr(test, assert_instr(vporq))]
28533pub fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28534    unsafe {
28535        let or = _mm256_or_epi64(a, b).as_i64x4();
28536        transmute(simd_select_bitmask(k, or, i64x4::ZERO))
28537    }
28538}
28539
28540/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28541///
28542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
28543#[inline]
28544#[target_feature(enable = "avx512f,avx512vl")]
28545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28546#[cfg_attr(test, assert_instr(vor))] //should be vporq
28547pub fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
28548    unsafe { transmute(simd_or(a.as_i64x2(), b.as_i64x2())) }
28549}
28550
28551/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28552///
28553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
28554#[inline]
28555#[target_feature(enable = "avx512f,avx512vl")]
28556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28557#[cfg_attr(test, assert_instr(vporq))]
28558pub fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28559    unsafe {
28560        let or = _mm_or_epi64(a, b).as_i64x2();
28561        transmute(simd_select_bitmask(k, or, src.as_i64x2()))
28562    }
28563}
28564
28565/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28566///
28567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
28568#[inline]
28569#[target_feature(enable = "avx512f,avx512vl")]
28570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28571#[cfg_attr(test, assert_instr(vporq))]
28572pub fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28573    unsafe {
28574        let or = _mm_or_epi64(a, b).as_i64x2();
28575        transmute(simd_select_bitmask(k, or, i64x2::ZERO))
28576    }
28577}
28578
28579/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
28580///
28581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
28582#[inline]
28583#[target_feature(enable = "avx512f")]
28584#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28585#[cfg_attr(test, assert_instr(vporq))]
28586pub fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
28587    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
28588}
28589
28590/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28591///
28592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
28593#[inline]
28594#[target_feature(enable = "avx512f")]
28595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28596#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
28597pub fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
28598    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
28599}
28600
28601/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28602///
28603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
28604#[inline]
28605#[target_feature(enable = "avx512f")]
28606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28607#[cfg_attr(test, assert_instr(vpxord))]
28608pub fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28609    unsafe {
28610        let xor = _mm512_xor_epi32(a, b).as_i32x16();
28611        transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
28612    }
28613}
28614
28615/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28616///
28617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
28618#[inline]
28619#[target_feature(enable = "avx512f")]
28620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28621#[cfg_attr(test, assert_instr(vpxord))]
28622pub fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28623    unsafe {
28624        let xor = _mm512_xor_epi32(a, b).as_i32x16();
28625        transmute(simd_select_bitmask(k, xor, i32x16::ZERO))
28626    }
28627}
28628
28629/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28630///
28631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
28632#[inline]
28633#[target_feature(enable = "avx512f,avx512vl")]
28634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28635#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28636pub fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
28637    unsafe { transmute(simd_xor(a.as_i32x8(), b.as_i32x8())) }
28638}
28639
28640/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28641///
28642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
28643#[inline]
28644#[target_feature(enable = "avx512f,avx512vl")]
28645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28646#[cfg_attr(test, assert_instr(vpxord))]
28647pub fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28648    unsafe {
28649        let xor = _mm256_xor_epi32(a, b).as_i32x8();
28650        transmute(simd_select_bitmask(k, xor, src.as_i32x8()))
28651    }
28652}
28653
28654/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28655///
28656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
28657#[inline]
28658#[target_feature(enable = "avx512f,avx512vl")]
28659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28660#[cfg_attr(test, assert_instr(vpxord))]
28661pub fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28662    unsafe {
28663        let xor = _mm256_xor_epi32(a, b).as_i32x8();
28664        transmute(simd_select_bitmask(k, xor, i32x8::ZERO))
28665    }
28666}
28667
28668/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28669///
28670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
28671#[inline]
28672#[target_feature(enable = "avx512f,avx512vl")]
28673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28674#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28675pub fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
28676    unsafe { transmute(simd_xor(a.as_i32x4(), b.as_i32x4())) }
28677}
28678
28679/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28680///
28681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
28682#[inline]
28683#[target_feature(enable = "avx512f,avx512vl")]
28684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28685#[cfg_attr(test, assert_instr(vpxord))]
28686pub fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28687    unsafe {
28688        let xor = _mm_xor_epi32(a, b).as_i32x4();
28689        transmute(simd_select_bitmask(k, xor, src.as_i32x4()))
28690    }
28691}
28692
28693/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28694///
28695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
28696#[inline]
28697#[target_feature(enable = "avx512f,avx512vl")]
28698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28699#[cfg_attr(test, assert_instr(vpxord))]
28700pub fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28701    unsafe {
28702        let xor = _mm_xor_epi32(a, b).as_i32x4();
28703        transmute(simd_select_bitmask(k, xor, i32x4::ZERO))
28704    }
28705}
28706
28707/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28708///
28709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
28710#[inline]
28711#[target_feature(enable = "avx512f")]
28712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28713#[cfg_attr(test, assert_instr(vpxorq))]
28714pub fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
28715    unsafe { transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) }
28716}
28717
28718/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28719///
28720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
28721#[inline]
28722#[target_feature(enable = "avx512f")]
28723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28724#[cfg_attr(test, assert_instr(vpxorq))]
28725pub fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28726    unsafe {
28727        let xor = _mm512_xor_epi64(a, b).as_i64x8();
28728        transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
28729    }
28730}
28731
28732/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28733///
28734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
28735#[inline]
28736#[target_feature(enable = "avx512f")]
28737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28738#[cfg_attr(test, assert_instr(vpxorq))]
28739pub fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28740    unsafe {
28741        let xor = _mm512_xor_epi64(a, b).as_i64x8();
28742        transmute(simd_select_bitmask(k, xor, i64x8::ZERO))
28743    }
28744}
28745
28746/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28747///
28748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
28749#[inline]
28750#[target_feature(enable = "avx512f,avx512vl")]
28751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28752#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28753pub fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
28754    unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
28755}
28756
28757/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28758///
28759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
28760#[inline]
28761#[target_feature(enable = "avx512f,avx512vl")]
28762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28763#[cfg_attr(test, assert_instr(vpxorq))]
28764pub fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28765    unsafe {
28766        let xor = _mm256_xor_epi64(a, b).as_i64x4();
28767        transmute(simd_select_bitmask(k, xor, src.as_i64x4()))
28768    }
28769}
28770
28771/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28772///
28773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
28774#[inline]
28775#[target_feature(enable = "avx512f,avx512vl")]
28776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28777#[cfg_attr(test, assert_instr(vpxorq))]
28778pub fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28779    unsafe {
28780        let xor = _mm256_xor_epi64(a, b).as_i64x4();
28781        transmute(simd_select_bitmask(k, xor, i64x4::ZERO))
28782    }
28783}
28784
28785/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28786///
28787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
28788#[inline]
28789#[target_feature(enable = "avx512f,avx512vl")]
28790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28791#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28792pub fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
28793    unsafe { transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) }
28794}
28795
28796/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28797///
28798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
28799#[inline]
28800#[target_feature(enable = "avx512f,avx512vl")]
28801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28802#[cfg_attr(test, assert_instr(vpxorq))]
28803pub fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28804    unsafe {
28805        let xor = _mm_xor_epi64(a, b).as_i64x2();
28806        transmute(simd_select_bitmask(k, xor, src.as_i64x2()))
28807    }
28808}
28809
28810/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28811///
28812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
28813#[inline]
28814#[target_feature(enable = "avx512f,avx512vl")]
28815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28816#[cfg_attr(test, assert_instr(vpxorq))]
28817pub fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28818    unsafe {
28819        let xor = _mm_xor_epi64(a, b).as_i64x2();
28820        transmute(simd_select_bitmask(k, xor, i64x2::ZERO))
28821    }
28822}
28823
28824/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
28825///
28826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
28827#[inline]
28828#[target_feature(enable = "avx512f")]
28829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28830#[cfg_attr(test, assert_instr(vpxorq))]
28831pub fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
28832    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
28833}
28834
28835/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
28836///
28837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
28838#[inline]
28839#[target_feature(enable = "avx512f")]
28840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28841#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28842pub fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
28843    _mm512_and_epi32(_mm512_xor_epi32(a, _mm512_set1_epi32(u32::MAX as i32)), b)
28844}
28845
28846/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28847///
28848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
28849#[inline]
28850#[target_feature(enable = "avx512f")]
28851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28852#[cfg_attr(test, assert_instr(vpandnd))]
28853pub fn _mm512_mask_andnot_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28854    unsafe {
28855        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
28856        transmute(simd_select_bitmask(k, andnot, src.as_i32x16()))
28857    }
28858}
28859
28860/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28861///
28862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
28863#[inline]
28864#[target_feature(enable = "avx512f")]
28865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28866#[cfg_attr(test, assert_instr(vpandnd))]
28867pub fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28868    unsafe {
28869        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
28870        transmute(simd_select_bitmask(k, andnot, i32x16::ZERO))
28871    }
28872}
28873
28874/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28875///
28876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
28877#[inline]
28878#[target_feature(enable = "avx512f,avx512vl")]
28879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28880#[cfg_attr(test, assert_instr(vpandnd))]
28881pub fn _mm256_mask_andnot_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28882    unsafe {
28883        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
28884        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
28885        transmute(simd_select_bitmask(k, andnot, src.as_i32x8()))
28886    }
28887}
28888
28889/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28890///
28891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
28892#[inline]
28893#[target_feature(enable = "avx512f,avx512vl")]
28894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28895#[cfg_attr(test, assert_instr(vpandnd))]
28896pub fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28897    unsafe {
28898        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
28899        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
28900        transmute(simd_select_bitmask(k, andnot, i32x8::ZERO))
28901    }
28902}
28903
28904/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28905///
28906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
28907#[inline]
28908#[target_feature(enable = "avx512f,avx512vl")]
28909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28910#[cfg_attr(test, assert_instr(vpandnd))]
28911pub fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28912    unsafe {
28913        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
28914        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
28915        transmute(simd_select_bitmask(k, andnot, src.as_i32x4()))
28916    }
28917}
28918
28919/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28920///
28921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
28922#[inline]
28923#[target_feature(enable = "avx512f,avx512vl")]
28924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28925#[cfg_attr(test, assert_instr(vpandnd))]
28926pub fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28927    unsafe {
28928        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
28929        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
28930        transmute(simd_select_bitmask(k, andnot, i32x4::ZERO))
28931    }
28932}
28933
28934/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
28935///
28936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
28937#[inline]
28938#[target_feature(enable = "avx512f")]
28939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28940#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28941pub fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
28942    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
28943}
28944
28945/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28946///
28947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
28948#[inline]
28949#[target_feature(enable = "avx512f")]
28950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28951#[cfg_attr(test, assert_instr(vpandnq))]
28952pub fn _mm512_mask_andnot_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28953    unsafe {
28954        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
28955        transmute(simd_select_bitmask(k, andnot, src.as_i64x8()))
28956    }
28957}
28958
28959/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28960///
28961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
28962#[inline]
28963#[target_feature(enable = "avx512f")]
28964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28965#[cfg_attr(test, assert_instr(vpandnq))]
28966pub fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28967    unsafe {
28968        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
28969        transmute(simd_select_bitmask(k, andnot, i64x8::ZERO))
28970    }
28971}
28972
28973/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28974///
28975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
28976#[inline]
28977#[target_feature(enable = "avx512f,avx512vl")]
28978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28979#[cfg_attr(test, assert_instr(vpandnq))]
28980pub fn _mm256_mask_andnot_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28981    unsafe {
28982        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
28983        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
28984        transmute(simd_select_bitmask(k, andnot, src.as_i64x4()))
28985    }
28986}
28987
28988/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28989///
28990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
28991#[inline]
28992#[target_feature(enable = "avx512f,avx512vl")]
28993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28994#[cfg_attr(test, assert_instr(vpandnq))]
28995pub fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28996    unsafe {
28997        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
28998        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
28999        transmute(simd_select_bitmask(k, andnot, i64x4::ZERO))
29000    }
29001}
29002
29003/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29004///
29005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
29006#[inline]
29007#[target_feature(enable = "avx512f,avx512vl")]
29008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29009#[cfg_attr(test, assert_instr(vpandnq))]
29010pub fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29011    unsafe {
29012        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
29013        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
29014        transmute(simd_select_bitmask(k, andnot, src.as_i64x2()))
29015    }
29016}
29017
29018/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29019///
29020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
29021#[inline]
29022#[target_feature(enable = "avx512f,avx512vl")]
29023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29024#[cfg_attr(test, assert_instr(vpandnq))]
29025pub fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29026    unsafe {
29027        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
29028        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
29029        transmute(simd_select_bitmask(k, andnot, i64x2::ZERO))
29030    }
29031}
29032
29033/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
29034///
29035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
29036#[inline]
29037#[target_feature(enable = "avx512f")]
29038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29039#[cfg_attr(test, assert_instr(vpandnq))]
29040pub fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
29041    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
29042}
29043
29044/// Convert 16-bit mask a into an integer value, and store the result in dst.
29045///
29046/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
29047#[inline]
29048#[target_feature(enable = "avx512f")]
29049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29050pub fn _cvtmask16_u32(a: __mmask16) -> u32 {
29051    a as u32
29052}
29053
29054/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
29055///
29056/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
29057#[inline]
29058#[target_feature(enable = "avx512f")]
29059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29060pub fn _cvtu32_mask16(a: u32) -> __mmask16 {
29061    a as __mmask16
29062}
29063
29064/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29065///
29066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
29067#[inline]
29068#[target_feature(enable = "avx512f")]
29069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29070#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29071pub fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29072    a & b
29073}
29074
29075/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29076///
29077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kand&expand=3210)
29078#[inline]
29079#[target_feature(enable = "avx512f")]
29080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29081#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29082pub fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
29083    a & b
29084}
29085
29086/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29087///
29088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
29089#[inline]
29090#[target_feature(enable = "avx512f")]
29091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29092#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29093pub fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29094    a | b
29095}
29096
29097/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29098///
29099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kor&expand=3237)
29100#[inline]
29101#[target_feature(enable = "avx512f")]
29102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29103#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29104pub fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
29105    a | b
29106}
29107
29108/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29109///
29110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
29111#[inline]
29112#[target_feature(enable = "avx512f")]
29113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29114#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29115pub fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29116    a ^ b
29117}
29118
29119/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29120///
29121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxor&expand=3289)
29122#[inline]
29123#[target_feature(enable = "avx512f")]
29124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29125#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29126pub fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
29127    a ^ b
29128}
29129
29130/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29131///
29132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
29133#[inline]
29134#[target_feature(enable = "avx512f")]
29135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29136pub fn _knot_mask16(a: __mmask16) -> __mmask16 {
29137    a ^ 0b11111111_11111111
29138}
29139
29140/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29141///
29142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_knot&expand=3231)
29143#[inline]
29144#[target_feature(enable = "avx512f")]
29145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29146pub fn _mm512_knot(a: __mmask16) -> __mmask16 {
29147    a ^ 0b11111111_11111111
29148}
29149
29150/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29151///
29152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
29153#[inline]
29154#[target_feature(enable = "avx512f")]
29155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29156#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
29157pub fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29158    _mm512_kand(_mm512_knot(a), b)
29159}
29160
29161/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29162///
29163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kandn&expand=3216)
29164#[inline]
29165#[target_feature(enable = "avx512f")]
29166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29167#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
29168pub fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
29169    _mm512_kand(_mm512_knot(a), b)
29170}
29171
29172/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29173///
29174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
29175#[inline]
29176#[target_feature(enable = "avx512f")]
29177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29178#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
29179pub fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29180    _mm512_knot(_mm512_kxor(a, b))
29181}
29182
29183/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29184///
29185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxnor&expand=3283)
29186#[inline]
29187#[target_feature(enable = "avx512f")]
29188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29189#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
29190pub fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
29191    _mm512_knot(_mm512_kxor(a, b))
29192}
29193
29194/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29195/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
29196///
29197/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
29198#[inline]
29199#[target_feature(enable = "avx512f")]
29200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29201pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
29202    let tmp = _kor_mask16(a, b);
29203    *all_ones = (tmp == 0xffff) as u8;
29204    (tmp == 0) as u8
29205}
29206
29207/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
29208/// store 0 in dst.
29209///
29210/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
29211#[inline]
29212#[target_feature(enable = "avx512f")]
29213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29214pub fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29215    (_kor_mask16(a, b) == 0xffff) as u8
29216}
29217
29218/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29219/// store 0 in dst.
29220///
29221/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
29222#[inline]
29223#[target_feature(enable = "avx512f")]
29224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29225pub fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29226    (_kor_mask16(a, b) == 0) as u8
29227}
29228
29229/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
29230///
29231/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
29232#[inline]
29233#[target_feature(enable = "avx512f")]
29234#[rustc_legacy_const_generics(1)]
29235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29236pub fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29237    a << COUNT
29238}
29239
29240/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
29241///
29242/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
29243#[inline]
29244#[target_feature(enable = "avx512f")]
29245#[rustc_legacy_const_generics(1)]
29246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29247pub fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29248    a >> COUNT
29249}
29250
29251/// Load 16-bit mask from memory
29252///
29253/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
29254#[inline]
29255#[target_feature(enable = "avx512f")]
29256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29257pub unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
29258    *mem_addr
29259}
29260
29261/// Store 16-bit mask to memory
29262///
29263/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
29264#[inline]
29265#[target_feature(enable = "avx512f")]
29266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29267pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
29268    *mem_addr = a;
29269}
29270
29271/// Copy 16-bit mask a to k.
29272///
29273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
29274#[inline]
29275#[target_feature(enable = "avx512f")]
29276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29277#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29278pub fn _mm512_kmov(a: __mmask16) -> __mmask16 {
29279    a
29280}
29281
29282/// Converts integer mask into bitmask, storing the result in dst.
29283///
29284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_int2mask&expand=3189)
29285#[inline]
29286#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
29287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29288pub fn _mm512_int2mask(mask: i32) -> __mmask16 {
29289    mask as u16
29290}
29291
29292/// Converts bit mask k1 into an integer value, storing the results in dst.
29293///
29294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2int&expand=3544)
29295#[inline]
29296#[target_feature(enable = "avx512f")]
29297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29298#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29299pub fn _mm512_mask2int(k1: __mmask16) -> i32 {
29300    k1 as i32
29301}
29302
29303/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
29304///
29305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackb&expand=3280)
29306#[inline]
29307#[target_feature(enable = "avx512f")]
29308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29309#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
29310pub fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
29311    ((a & 0xff) << 8) | (b & 0xff)
29312}
29313
29314/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
29315///
29316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestc&expand=3247)
29317#[inline]
29318#[target_feature(enable = "avx512f")]
29319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29320#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
29321pub fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
29322    let r = (a | b) == 0b11111111_11111111;
29323    r as i32
29324}
29325
29326/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
29327///
29328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestz)
29329#[inline]
29330#[target_feature(enable = "avx512f")]
29331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29332#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
29333pub fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
29334    let r = (a | b) == 0;
29335    r as i32
29336}
29337
29338/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29339///
29340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
29341#[inline]
29342#[target_feature(enable = "avx512f")]
29343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29344#[cfg_attr(test, assert_instr(vptestmd))]
29345pub fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29346    let and = _mm512_and_epi32(a, b);
29347    let zero = _mm512_setzero_si512();
29348    _mm512_cmpneq_epi32_mask(and, zero)
29349}
29350
29351/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29352///
29353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
29354#[inline]
29355#[target_feature(enable = "avx512f")]
29356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29357#[cfg_attr(test, assert_instr(vptestmd))]
29358pub fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29359    let and = _mm512_and_epi32(a, b);
29360    let zero = _mm512_setzero_si512();
29361    _mm512_mask_cmpneq_epi32_mask(k, and, zero)
29362}
29363
29364/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29365///
29366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
29367#[inline]
29368#[target_feature(enable = "avx512f,avx512vl")]
29369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29370#[cfg_attr(test, assert_instr(vptestmd))]
29371pub fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29372    let and = _mm256_and_si256(a, b);
29373    let zero = _mm256_setzero_si256();
29374    _mm256_cmpneq_epi32_mask(and, zero)
29375}
29376
29377/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29378///
29379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
29380#[inline]
29381#[target_feature(enable = "avx512f,avx512vl")]
29382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29383#[cfg_attr(test, assert_instr(vptestmd))]
29384pub fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29385    let and = _mm256_and_si256(a, b);
29386    let zero = _mm256_setzero_si256();
29387    _mm256_mask_cmpneq_epi32_mask(k, and, zero)
29388}
29389
29390/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29391///
29392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
29393#[inline]
29394#[target_feature(enable = "avx512f,avx512vl")]
29395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29396#[cfg_attr(test, assert_instr(vptestmd))]
29397pub fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29398    let and = _mm_and_si128(a, b);
29399    let zero = _mm_setzero_si128();
29400    _mm_cmpneq_epi32_mask(and, zero)
29401}
29402
29403/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29404///
29405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
29406#[inline]
29407#[target_feature(enable = "avx512f,avx512vl")]
29408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29409#[cfg_attr(test, assert_instr(vptestmd))]
29410pub fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29411    let and = _mm_and_si128(a, b);
29412    let zero = _mm_setzero_si128();
29413    _mm_mask_cmpneq_epi32_mask(k, and, zero)
29414}
29415
29416/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29417///
29418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
29419#[inline]
29420#[target_feature(enable = "avx512f")]
29421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29422#[cfg_attr(test, assert_instr(vptestmq))]
29423pub fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29424    let and = _mm512_and_epi64(a, b);
29425    let zero = _mm512_setzero_si512();
29426    _mm512_cmpneq_epi64_mask(and, zero)
29427}
29428
29429/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29430///
29431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
29432#[inline]
29433#[target_feature(enable = "avx512f")]
29434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29435#[cfg_attr(test, assert_instr(vptestmq))]
29436pub fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29437    let and = _mm512_and_epi64(a, b);
29438    let zero = _mm512_setzero_si512();
29439    _mm512_mask_cmpneq_epi64_mask(k, and, zero)
29440}
29441
29442/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29443///
29444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
29445#[inline]
29446#[target_feature(enable = "avx512f,avx512vl")]
29447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29448#[cfg_attr(test, assert_instr(vptestmq))]
29449pub fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29450    let and = _mm256_and_si256(a, b);
29451    let zero = _mm256_setzero_si256();
29452    _mm256_cmpneq_epi64_mask(and, zero)
29453}
29454
29455/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29456///
29457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
29458#[inline]
29459#[target_feature(enable = "avx512f,avx512vl")]
29460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29461#[cfg_attr(test, assert_instr(vptestmq))]
29462pub fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29463    let and = _mm256_and_si256(a, b);
29464    let zero = _mm256_setzero_si256();
29465    _mm256_mask_cmpneq_epi64_mask(k, and, zero)
29466}
29467
29468/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29469///
29470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
29471#[inline]
29472#[target_feature(enable = "avx512f,avx512vl")]
29473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29474#[cfg_attr(test, assert_instr(vptestmq))]
29475pub fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29476    let and = _mm_and_si128(a, b);
29477    let zero = _mm_setzero_si128();
29478    _mm_cmpneq_epi64_mask(and, zero)
29479}
29480
29481/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29482///
29483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
29484#[inline]
29485#[target_feature(enable = "avx512f,avx512vl")]
29486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29487#[cfg_attr(test, assert_instr(vptestmq))]
29488pub fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29489    let and = _mm_and_si128(a, b);
29490    let zero = _mm_setzero_si128();
29491    _mm_mask_cmpneq_epi64_mask(k, and, zero)
29492}
29493
29494/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29495///
29496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
29497#[inline]
29498#[target_feature(enable = "avx512f")]
29499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29500#[cfg_attr(test, assert_instr(vptestnmd))]
29501pub fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29502    let and = _mm512_and_epi32(a, b);
29503    let zero = _mm512_setzero_si512();
29504    _mm512_cmpeq_epi32_mask(and, zero)
29505}
29506
29507/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29508///
29509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
29510#[inline]
29511#[target_feature(enable = "avx512f")]
29512#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29513#[cfg_attr(test, assert_instr(vptestnmd))]
29514pub fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29515    let and = _mm512_and_epi32(a, b);
29516    let zero = _mm512_setzero_si512();
29517    _mm512_mask_cmpeq_epi32_mask(k, and, zero)
29518}
29519
29520/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29521///
29522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
29523#[inline]
29524#[target_feature(enable = "avx512f,avx512vl")]
29525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29526#[cfg_attr(test, assert_instr(vptestnmd))]
29527pub fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29528    let and = _mm256_and_si256(a, b);
29529    let zero = _mm256_setzero_si256();
29530    _mm256_cmpeq_epi32_mask(and, zero)
29531}
29532
29533/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29534///
29535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
29536#[inline]
29537#[target_feature(enable = "avx512f,avx512vl")]
29538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29539#[cfg_attr(test, assert_instr(vptestnmd))]
29540pub fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29541    let and = _mm256_and_si256(a, b);
29542    let zero = _mm256_setzero_si256();
29543    _mm256_mask_cmpeq_epi32_mask(k, and, zero)
29544}
29545
29546/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29547///
29548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
29549#[inline]
29550#[target_feature(enable = "avx512f,avx512vl")]
29551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29552#[cfg_attr(test, assert_instr(vptestnmd))]
29553pub fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29554    let and = _mm_and_si128(a, b);
29555    let zero = _mm_setzero_si128();
29556    _mm_cmpeq_epi32_mask(and, zero)
29557}
29558
29559/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29560///
29561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
29562#[inline]
29563#[target_feature(enable = "avx512f,avx512vl")]
29564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29565#[cfg_attr(test, assert_instr(vptestnmd))]
29566pub fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29567    let and = _mm_and_si128(a, b);
29568    let zero = _mm_setzero_si128();
29569    _mm_mask_cmpeq_epi32_mask(k, and, zero)
29570}
29571
29572/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29573///
29574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
29575#[inline]
29576#[target_feature(enable = "avx512f")]
29577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29578#[cfg_attr(test, assert_instr(vptestnmq))]
29579pub fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29580    let and = _mm512_and_epi64(a, b);
29581    let zero = _mm512_setzero_si512();
29582    _mm512_cmpeq_epi64_mask(and, zero)
29583}
29584
29585/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29586///
29587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
29588#[inline]
29589#[target_feature(enable = "avx512f")]
29590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29591#[cfg_attr(test, assert_instr(vptestnmq))]
29592pub fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29593    let and = _mm512_and_epi64(a, b);
29594    let zero = _mm512_setzero_si512();
29595    _mm512_mask_cmpeq_epi64_mask(k, and, zero)
29596}
29597
29598/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29599///
29600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
29601#[inline]
29602#[target_feature(enable = "avx512f,avx512vl")]
29603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29604#[cfg_attr(test, assert_instr(vptestnmq))]
29605pub fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29606    let and = _mm256_and_si256(a, b);
29607    let zero = _mm256_setzero_si256();
29608    _mm256_cmpeq_epi64_mask(and, zero)
29609}
29610
29611/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29612///
29613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
29614#[inline]
29615#[target_feature(enable = "avx512f,avx512vl")]
29616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29617#[cfg_attr(test, assert_instr(vptestnmq))]
29618pub fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29619    let and = _mm256_and_si256(a, b);
29620    let zero = _mm256_setzero_si256();
29621    _mm256_mask_cmpeq_epi64_mask(k, and, zero)
29622}
29623
29624/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29625///
29626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
29627#[inline]
29628#[target_feature(enable = "avx512f,avx512vl")]
29629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29630#[cfg_attr(test, assert_instr(vptestnmq))]
29631pub fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29632    let and = _mm_and_si128(a, b);
29633    let zero = _mm_setzero_si128();
29634    _mm_cmpeq_epi64_mask(and, zero)
29635}
29636
29637/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29638///
29639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
29640#[inline]
29641#[target_feature(enable = "avx512f,avx512vl")]
29642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29643#[cfg_attr(test, assert_instr(vptestnmq))]
29644pub fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29645    let and = _mm_and_si128(a, b);
29646    let zero = _mm_setzero_si128();
29647    _mm_mask_cmpeq_epi64_mask(k, and, zero)
29648}
29649
29650/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29651///
29652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
29653///
29654/// # Safety of non-temporal stores
29655///
29656/// After using this intrinsic, but before any other access to the memory that this intrinsic
29657/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29658/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29659/// return.
29660///
29661/// See [`_mm_sfence`] for details.
29662#[inline]
29663#[target_feature(enable = "avx512f")]
29664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29665#[cfg_attr(test, assert_instr(vmovntps))]
29666#[allow(clippy::cast_ptr_alignment)]
29667pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
29668    crate::arch::asm!(
29669        vps!("vmovntps", ",{a}"),
29670        p = in(reg) mem_addr,
29671        a = in(zmm_reg) a,
29672        options(nostack, preserves_flags),
29673    );
29674}
29675
29676/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29677///
29678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
29679///
29680/// # Safety of non-temporal stores
29681///
29682/// After using this intrinsic, but before any other access to the memory that this intrinsic
29683/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29684/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29685/// return.
29686///
29687/// See [`_mm_sfence`] for details.
29688#[inline]
29689#[target_feature(enable = "avx512f")]
29690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29691#[cfg_attr(test, assert_instr(vmovntpd))]
29692#[allow(clippy::cast_ptr_alignment)]
29693pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
29694    crate::arch::asm!(
29695        vps!("vmovntpd", ",{a}"),
29696        p = in(reg) mem_addr,
29697        a = in(zmm_reg) a,
29698        options(nostack, preserves_flags),
29699    );
29700}
29701
29702/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29703///
29704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
29705///
29706/// # Safety of non-temporal stores
29707///
29708/// After using this intrinsic, but before any other access to the memory that this intrinsic
29709/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29710/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29711/// return.
29712///
29713/// See [`_mm_sfence`] for details.
29714#[inline]
29715#[target_feature(enable = "avx512f")]
29716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29717#[cfg_attr(test, assert_instr(vmovntdq))]
29718#[allow(clippy::cast_ptr_alignment)]
29719pub unsafe fn _mm512_stream_si512(mem_addr: *mut __m512i, a: __m512i) {
29720    crate::arch::asm!(
29721        vps!("vmovntdq", ",{a}"),
29722        p = in(reg) mem_addr,
29723        a = in(zmm_reg) a,
29724        options(nostack, preserves_flags),
29725    );
29726}
29727
29728/// Load 512-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
29729/// must be aligned on a 64-byte boundary or a general-protection exception may be generated. To
29730/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
29731///
29732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_load_si512)
29733#[inline]
29734#[target_feature(enable = "avx512f")]
29735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29736pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i {
29737    let dst: __m512i;
29738    crate::arch::asm!(
29739        vpl!("vmovntdqa {a}"),
29740        a = out(zmm_reg) dst,
29741        p = in(reg) mem_addr,
29742        options(pure, readonly, nostack, preserves_flags),
29743    );
29744    dst
29745}
29746
29747/// Sets packed 32-bit integers in `dst` with the supplied values.
29748///
29749/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
29750#[inline]
29751#[target_feature(enable = "avx512f")]
29752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29753pub fn _mm512_set_ps(
29754    e0: f32,
29755    e1: f32,
29756    e2: f32,
29757    e3: f32,
29758    e4: f32,
29759    e5: f32,
29760    e6: f32,
29761    e7: f32,
29762    e8: f32,
29763    e9: f32,
29764    e10: f32,
29765    e11: f32,
29766    e12: f32,
29767    e13: f32,
29768    e14: f32,
29769    e15: f32,
29770) -> __m512 {
29771    _mm512_setr_ps(
29772        e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
29773    )
29774}
29775
29776/// Sets packed 32-bit integers in `dst` with the supplied values in
29777/// reverse order.
29778///
29779/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
29780#[inline]
29781#[target_feature(enable = "avx512f")]
29782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29783pub fn _mm512_setr_ps(
29784    e0: f32,
29785    e1: f32,
29786    e2: f32,
29787    e3: f32,
29788    e4: f32,
29789    e5: f32,
29790    e6: f32,
29791    e7: f32,
29792    e8: f32,
29793    e9: f32,
29794    e10: f32,
29795    e11: f32,
29796    e12: f32,
29797    e13: f32,
29798    e14: f32,
29799    e15: f32,
29800) -> __m512 {
29801    unsafe {
29802        let r = f32x16::new(
29803            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
29804        );
29805        transmute(r)
29806    }
29807}
29808
29809/// Broadcast 64-bit float `a` to all elements of `dst`.
29810///
29811/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
29812#[inline]
29813#[target_feature(enable = "avx512f")]
29814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29815pub fn _mm512_set1_pd(a: f64) -> __m512d {
29816    unsafe { transmute(f64x8::splat(a)) }
29817}
29818
29819/// Broadcast 32-bit float `a` to all elements of `dst`.
29820///
29821/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
29822#[inline]
29823#[target_feature(enable = "avx512f")]
29824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29825pub fn _mm512_set1_ps(a: f32) -> __m512 {
29826    unsafe { transmute(f32x16::splat(a)) }
29827}
29828
29829/// Sets packed 32-bit integers in `dst` with the supplied values.
29830///
29831/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
29832#[inline]
29833#[target_feature(enable = "avx512f")]
29834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29835pub fn _mm512_set_epi32(
29836    e15: i32,
29837    e14: i32,
29838    e13: i32,
29839    e12: i32,
29840    e11: i32,
29841    e10: i32,
29842    e9: i32,
29843    e8: i32,
29844    e7: i32,
29845    e6: i32,
29846    e5: i32,
29847    e4: i32,
29848    e3: i32,
29849    e2: i32,
29850    e1: i32,
29851    e0: i32,
29852) -> __m512i {
29853    _mm512_setr_epi32(
29854        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
29855    )
29856}
29857
29858/// Broadcast 8-bit integer a to all elements of dst.
29859///
29860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
29861#[inline]
29862#[target_feature(enable = "avx512f")]
29863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29864pub fn _mm512_set1_epi8(a: i8) -> __m512i {
29865    unsafe { transmute(i8x64::splat(a)) }
29866}
29867
29868/// Broadcast the low packed 16-bit integer from a to all elements of dst.
29869///
29870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
29871#[inline]
29872#[target_feature(enable = "avx512f")]
29873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29874pub fn _mm512_set1_epi16(a: i16) -> __m512i {
29875    unsafe { transmute(i16x32::splat(a)) }
29876}
29877
29878/// Broadcast 32-bit integer `a` to all elements of `dst`.
29879///
29880/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32)
29881#[inline]
29882#[target_feature(enable = "avx512f")]
29883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29884pub fn _mm512_set1_epi32(a: i32) -> __m512i {
29885    unsafe { transmute(i32x16::splat(a)) }
29886}
29887
29888/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29889///
29890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
29891#[inline]
29892#[target_feature(enable = "avx512f")]
29893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29894#[cfg_attr(test, assert_instr(vpbroadcastd))]
29895pub fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
29896    unsafe {
29897        let r = _mm512_set1_epi32(a).as_i32x16();
29898        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
29899    }
29900}
29901
29902/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29903///
29904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
29905#[inline]
29906#[target_feature(enable = "avx512f")]
29907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29908#[cfg_attr(test, assert_instr(vpbroadcastd))]
29909pub fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
29910    unsafe {
29911        let r = _mm512_set1_epi32(a).as_i32x16();
29912        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
29913    }
29914}
29915
29916/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29917///
29918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
29919#[inline]
29920#[target_feature(enable = "avx512f,avx512vl")]
29921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29922#[cfg_attr(test, assert_instr(vpbroadcastd))]
29923pub fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
29924    unsafe {
29925        let r = _mm256_set1_epi32(a).as_i32x8();
29926        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
29927    }
29928}
29929
29930/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29931///
29932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
29933#[inline]
29934#[target_feature(enable = "avx512f,avx512vl")]
29935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29936#[cfg_attr(test, assert_instr(vpbroadcastd))]
29937pub fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
29938    unsafe {
29939        let r = _mm256_set1_epi32(a).as_i32x8();
29940        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
29941    }
29942}
29943
29944/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29945///
29946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
29947#[inline]
29948#[target_feature(enable = "avx512f,avx512vl")]
29949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29950#[cfg_attr(test, assert_instr(vpbroadcastd))]
29951pub fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
29952    unsafe {
29953        let r = _mm_set1_epi32(a).as_i32x4();
29954        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
29955    }
29956}
29957
29958/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29959///
29960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
29961#[inline]
29962#[target_feature(enable = "avx512f,avx512vl")]
29963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29964#[cfg_attr(test, assert_instr(vpbroadcastd))]
29965pub fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
29966    unsafe {
29967        let r = _mm_set1_epi32(a).as_i32x4();
29968        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
29969    }
29970}
29971
29972/// Broadcast 64-bit integer `a` to all elements of `dst`.
29973///
29974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
29975#[inline]
29976#[target_feature(enable = "avx512f")]
29977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29978pub fn _mm512_set1_epi64(a: i64) -> __m512i {
29979    unsafe { transmute(i64x8::splat(a)) }
29980}
29981
29982/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29983///
29984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
29985#[inline]
29986#[target_feature(enable = "avx512f")]
29987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29988#[cfg_attr(test, assert_instr(vpbroadcastq))]
29989pub fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
29990    unsafe {
29991        let r = _mm512_set1_epi64(a).as_i64x8();
29992        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
29993    }
29994}
29995
29996/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29997///
29998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
29999#[inline]
30000#[target_feature(enable = "avx512f")]
30001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30002#[cfg_attr(test, assert_instr(vpbroadcastq))]
30003pub fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
30004    unsafe {
30005        let r = _mm512_set1_epi64(a).as_i64x8();
30006        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
30007    }
30008}
30009
30010/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30011///
30012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
30013#[inline]
30014#[target_feature(enable = "avx512f,avx512vl")]
30015#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30016#[cfg_attr(test, assert_instr(vpbroadcastq))]
30017pub fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
30018    unsafe {
30019        let r = _mm256_set1_epi64x(a).as_i64x4();
30020        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
30021    }
30022}
30023
30024/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30025///
30026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
30027#[inline]
30028#[target_feature(enable = "avx512f,avx512vl")]
30029#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30030#[cfg_attr(test, assert_instr(vpbroadcastq))]
30031pub fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
30032    unsafe {
30033        let r = _mm256_set1_epi64x(a).as_i64x4();
30034        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
30035    }
30036}
30037
30038/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30039///
30040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
30041#[inline]
30042#[target_feature(enable = "avx512f,avx512vl")]
30043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30044#[cfg_attr(test, assert_instr(vpbroadcastq))]
30045pub fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
30046    unsafe {
30047        let r = _mm_set1_epi64x(a).as_i64x2();
30048        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
30049    }
30050}
30051
30052/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30053///
30054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
30055#[inline]
30056#[target_feature(enable = "avx512f,avx512vl")]
30057#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30058#[cfg_attr(test, assert_instr(vpbroadcastq))]
30059pub fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
30060    unsafe {
30061        let r = _mm_set1_epi64x(a).as_i64x2();
30062        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
30063    }
30064}
30065
30066/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
30067///
30068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
30069#[inline]
30070#[target_feature(enable = "avx512f")]
30071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30072pub fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30073    _mm512_set_epi64(d, c, b, a, d, c, b, a)
30074}
30075
30076/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
30077///
30078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
30079#[inline]
30080#[target_feature(enable = "avx512f")]
30081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30082pub fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30083    _mm512_set_epi64(a, b, c, d, a, b, c, d)
30084}
30085
30086/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30087///
30088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
30089#[inline]
30090#[target_feature(enable = "avx512f")]
30091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30092#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30093pub fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30094    _mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
30095}
30096
30097/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30098///
30099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
30100#[inline]
30101#[target_feature(enable = "avx512f")]
30102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30103#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30104pub fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30105    _mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
30106}
30107
30108/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30109///
30110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
30111#[inline]
30112#[target_feature(enable = "avx512f")]
30113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30114#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30115pub fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30116    _mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
30117}
30118
30119/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30120///
30121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
30122#[inline]
30123#[target_feature(enable = "avx512f")]
30124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30125#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30126pub fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30127    _mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
30128}
30129
30130/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30131///
30132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
30133#[inline]
30134#[target_feature(enable = "avx512f")]
30135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30136#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30137pub fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30138    _mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
30139}
30140
30141/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30142///
30143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
30144#[inline]
30145#[target_feature(enable = "avx512f")]
30146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30147#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30148pub fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30149    _mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
30150}
30151
30152/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30153///
30154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
30155#[inline]
30156#[target_feature(enable = "avx512f")]
30157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30158#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30159pub fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30160    _mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
30161}
30162
30163/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30164///
30165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
30166#[inline]
30167#[target_feature(enable = "avx512f")]
30168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30169#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30170pub fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30171    _mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
30172}
30173
30174/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30175///
30176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
30177#[inline]
30178#[target_feature(enable = "avx512f")]
30179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30180#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30181pub fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30182    _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
30183}
30184
30185/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30186///
30187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
30188#[inline]
30189#[target_feature(enable = "avx512f")]
30190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30191#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30192pub fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30193    _mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
30194}
30195
30196/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30197///
30198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
30199#[inline]
30200#[target_feature(enable = "avx512f")]
30201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30202#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30203pub fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30204    _mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
30205}
30206
30207/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30208///
30209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
30210#[inline]
30211#[target_feature(enable = "avx512f")]
30212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30213#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30214pub fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30215    _mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
30216}
30217
30218/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30219///
30220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
30221#[inline]
30222#[target_feature(enable = "avx512f")]
30223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30224#[rustc_legacy_const_generics(2)]
30225#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30226pub fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
30227    unsafe {
30228        static_assert_uimm_bits!(IMM8, 5);
30229        let neg_one = -1;
30230        let a = a.as_f32x16();
30231        let b = b.as_f32x16();
30232        let r = vcmpps(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30233        r.cast_unsigned()
30234    }
30235}
30236
30237/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30238///
30239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
30240#[inline]
30241#[target_feature(enable = "avx512f")]
30242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30243#[rustc_legacy_const_generics(3)]
30244#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30245pub fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30246    unsafe {
30247        static_assert_uimm_bits!(IMM8, 5);
30248        let a = a.as_f32x16();
30249        let b = b.as_f32x16();
30250        let r = vcmpps(a, b, IMM8, k1 as i16, _MM_FROUND_CUR_DIRECTION);
30251        r.cast_unsigned()
30252    }
30253}
30254
30255/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30256///
30257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
30258#[inline]
30259#[target_feature(enable = "avx512f,avx512vl")]
30260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30261#[rustc_legacy_const_generics(2)]
30262#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30263pub fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
30264    unsafe {
30265        static_assert_uimm_bits!(IMM8, 5);
30266        let neg_one = -1;
30267        let a = a.as_f32x8();
30268        let b = b.as_f32x8();
30269        let r = vcmpps256(a, b, IMM8, neg_one);
30270        r.cast_unsigned()
30271    }
30272}
30273
30274/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30275///
30276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
30277#[inline]
30278#[target_feature(enable = "avx512f,avx512vl")]
30279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30280#[rustc_legacy_const_generics(3)]
30281#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30282pub fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256, b: __m256) -> __mmask8 {
30283    unsafe {
30284        static_assert_uimm_bits!(IMM8, 5);
30285        let a = a.as_f32x8();
30286        let b = b.as_f32x8();
30287        let r = vcmpps256(a, b, IMM8, k1 as i8);
30288        r.cast_unsigned()
30289    }
30290}
30291
30292/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30293///
30294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
30295#[inline]
30296#[target_feature(enable = "avx512f,avx512vl")]
30297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30298#[rustc_legacy_const_generics(2)]
30299#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30300pub fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30301    unsafe {
30302        static_assert_uimm_bits!(IMM8, 5);
30303        let neg_one = -1;
30304        let a = a.as_f32x4();
30305        let b = b.as_f32x4();
30306        let r = vcmpps128(a, b, IMM8, neg_one);
30307        r.cast_unsigned()
30308    }
30309}
30310
30311/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30312///
30313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
30314#[inline]
30315#[target_feature(enable = "avx512f,avx512vl")]
30316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30317#[rustc_legacy_const_generics(3)]
30318#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30319pub fn _mm_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30320    unsafe {
30321        static_assert_uimm_bits!(IMM8, 5);
30322        let a = a.as_f32x4();
30323        let b = b.as_f32x4();
30324        let r = vcmpps128(a, b, IMM8, k1 as i8);
30325        r.cast_unsigned()
30326    }
30327}
30328
30329/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30330/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30331///
30332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
30333#[inline]
30334#[target_feature(enable = "avx512f")]
30335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30336#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30337#[rustc_legacy_const_generics(2, 3)]
30338pub fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30339    a: __m512,
30340    b: __m512,
30341) -> __mmask16 {
30342    unsafe {
30343        static_assert_uimm_bits!(IMM5, 5);
30344        static_assert_mantissas_sae!(SAE);
30345        let neg_one = -1;
30346        let a = a.as_f32x16();
30347        let b = b.as_f32x16();
30348        let r = vcmpps(a, b, IMM5, neg_one, SAE);
30349        r.cast_unsigned()
30350    }
30351}
30352
30353/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30354/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30355///
30356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
30357#[inline]
30358#[target_feature(enable = "avx512f")]
30359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30360#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30361#[rustc_legacy_const_generics(3, 4)]
30362pub fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30363    m: __mmask16,
30364    a: __m512,
30365    b: __m512,
30366) -> __mmask16 {
30367    unsafe {
30368        static_assert_uimm_bits!(IMM5, 5);
30369        static_assert_mantissas_sae!(SAE);
30370        let a = a.as_f32x16();
30371        let b = b.as_f32x16();
30372        let r = vcmpps(a, b, IMM5, m as i16, SAE);
30373        r.cast_unsigned()
30374    }
30375}
30376
30377/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30378///
30379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
30380#[inline]
30381#[target_feature(enable = "avx512f")]
30382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30383#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
30384pub fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30385    _mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
30386}
30387
30388/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30389///
30390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
30391#[inline]
30392#[target_feature(enable = "avx512f")]
30393#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30394#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30395pub fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30396    _mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
30397}
30398
30399/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30400///
30401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
30402#[inline]
30403#[target_feature(enable = "avx512f")]
30404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30405#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30406pub fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30407    _mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
30408}
30409
30410/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30411///
30412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
30413#[inline]
30414#[target_feature(enable = "avx512f")]
30415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30416#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30417pub fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30418    _mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
30419}
30420
30421/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30422///
30423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
30424#[inline]
30425#[target_feature(enable = "avx512f")]
30426#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30427#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30428pub fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30429    _mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
30430}
30431
30432/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30433///
30434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
30435#[inline]
30436#[target_feature(enable = "avx512f")]
30437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30438#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30439pub fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30440    _mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
30441}
30442
30443/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30444///
30445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
30446#[inline]
30447#[target_feature(enable = "avx512f")]
30448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30449#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30450pub fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30451    _mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
30452}
30453
30454/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30455///
30456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
30457#[inline]
30458#[target_feature(enable = "avx512f")]
30459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30460#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30461pub fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30462    _mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(m, a, b)
30463}
30464
30465/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30466///
30467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
30468#[inline]
30469#[target_feature(enable = "avx512f")]
30470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30471#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30472pub fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30473    _mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
30474}
30475
30476/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30477///
30478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
30479#[inline]
30480#[target_feature(enable = "avx512f")]
30481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30482#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30483pub fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30484    _mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
30485}
30486
30487/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30488///
30489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
30490#[inline]
30491#[target_feature(enable = "avx512f")]
30492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30493#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30494pub fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30495    _mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
30496}
30497
30498/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30499///
30500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
30501#[inline]
30502#[target_feature(enable = "avx512f")]
30503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30504#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30505pub fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30506    _mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
30507}
30508
30509/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30510///
30511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
30512#[inline]
30513#[target_feature(enable = "avx512f")]
30514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30515#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30516pub fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30517    _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
30518}
30519
30520/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30521///
30522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
30523#[inline]
30524#[target_feature(enable = "avx512f")]
30525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30526#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30527pub fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30528    _mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
30529}
30530
30531/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30532///
30533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
30534#[inline]
30535#[target_feature(enable = "avx512f")]
30536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30537#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30538pub fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30539    _mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
30540}
30541
30542/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30543///
30544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
30545#[inline]
30546#[target_feature(enable = "avx512f")]
30547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30548#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30549pub fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30550    _mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
30551}
30552
30553/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30554///
30555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
30556#[inline]
30557#[target_feature(enable = "avx512f")]
30558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30559#[rustc_legacy_const_generics(2)]
30560#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30561pub fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
30562    unsafe {
30563        static_assert_uimm_bits!(IMM8, 5);
30564        let neg_one = -1;
30565        let a = a.as_f64x8();
30566        let b = b.as_f64x8();
30567        let r = vcmppd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30568        r.cast_unsigned()
30569    }
30570}
30571
30572/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30573///
30574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
30575#[inline]
30576#[target_feature(enable = "avx512f")]
30577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30578#[rustc_legacy_const_generics(3)]
30579#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30580pub fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30581    unsafe {
30582        static_assert_uimm_bits!(IMM8, 5);
30583        let a = a.as_f64x8();
30584        let b = b.as_f64x8();
30585        let r = vcmppd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30586        r.cast_unsigned()
30587    }
30588}
30589
30590/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30591///
30592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
30593#[inline]
30594#[target_feature(enable = "avx512f,avx512vl")]
30595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30596#[rustc_legacy_const_generics(2)]
30597#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30598pub fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
30599    unsafe {
30600        static_assert_uimm_bits!(IMM8, 5);
30601        let neg_one = -1;
30602        let a = a.as_f64x4();
30603        let b = b.as_f64x4();
30604        let r = vcmppd256(a, b, IMM8, neg_one);
30605        r.cast_unsigned()
30606    }
30607}
30608
30609/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30610///
30611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
30612#[inline]
30613#[target_feature(enable = "avx512f,avx512vl")]
30614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30615#[rustc_legacy_const_generics(3)]
30616#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30617pub fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d, b: __m256d) -> __mmask8 {
30618    unsafe {
30619        static_assert_uimm_bits!(IMM8, 5);
30620        let a = a.as_f64x4();
30621        let b = b.as_f64x4();
30622        let r = vcmppd256(a, b, IMM8, k1 as i8);
30623        r.cast_unsigned()
30624    }
30625}
30626
30627/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30628///
30629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
30630#[inline]
30631#[target_feature(enable = "avx512f,avx512vl")]
30632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30633#[rustc_legacy_const_generics(2)]
30634#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30635pub fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30636    unsafe {
30637        static_assert_uimm_bits!(IMM8, 5);
30638        let neg_one = -1;
30639        let a = a.as_f64x2();
30640        let b = b.as_f64x2();
30641        let r = vcmppd128(a, b, IMM8, neg_one);
30642        r.cast_unsigned()
30643    }
30644}
30645
30646/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30647///
30648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
30649#[inline]
30650#[target_feature(enable = "avx512f,avx512vl")]
30651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30652#[rustc_legacy_const_generics(3)]
30653#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30654pub fn _mm_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30655    unsafe {
30656        static_assert_uimm_bits!(IMM8, 5);
30657        let a = a.as_f64x2();
30658        let b = b.as_f64x2();
30659        let r = vcmppd128(a, b, IMM8, k1 as i8);
30660        r.cast_unsigned()
30661    }
30662}
30663
30664/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30665/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30666///
30667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
30668#[inline]
30669#[target_feature(enable = "avx512f")]
30670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30671#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30672#[rustc_legacy_const_generics(2, 3)]
30673pub fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30674    a: __m512d,
30675    b: __m512d,
30676) -> __mmask8 {
30677    unsafe {
30678        static_assert_uimm_bits!(IMM5, 5);
30679        static_assert_mantissas_sae!(SAE);
30680        let neg_one = -1;
30681        let a = a.as_f64x8();
30682        let b = b.as_f64x8();
30683        let r = vcmppd(a, b, IMM5, neg_one, SAE);
30684        r.cast_unsigned()
30685    }
30686}
30687
30688/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30689/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30690///
30691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
30692#[inline]
30693#[target_feature(enable = "avx512f")]
30694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30695#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30696#[rustc_legacy_const_generics(3, 4)]
30697pub fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30698    k1: __mmask8,
30699    a: __m512d,
30700    b: __m512d,
30701) -> __mmask8 {
30702    unsafe {
30703        static_assert_uimm_bits!(IMM5, 5);
30704        static_assert_mantissas_sae!(SAE);
30705        let a = a.as_f64x8();
30706        let b = b.as_f64x8();
30707        let r = vcmppd(a, b, IMM5, k1 as i8, SAE);
30708        r.cast_unsigned()
30709    }
30710}
30711
30712/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30713///
30714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
30715#[inline]
30716#[target_feature(enable = "avx512f")]
30717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30718#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30719pub fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30720    _mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
30721}
30722
30723/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30724///
30725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
30726#[inline]
30727#[target_feature(enable = "avx512f")]
30728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30729#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30730pub fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30731    _mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
30732}
30733
30734/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30735///
30736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
30737#[inline]
30738#[target_feature(enable = "avx512f")]
30739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30740#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30741pub fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30742    _mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
30743}
30744
30745/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30746///
30747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
30748#[inline]
30749#[target_feature(enable = "avx512f")]
30750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30751#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30752pub fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30753    _mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
30754}
30755
30756/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30757///
30758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
30759#[inline]
30760#[target_feature(enable = "avx512f")]
30761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30762#[rustc_legacy_const_generics(2)]
30763#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30764pub fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30765    unsafe {
30766        static_assert_uimm_bits!(IMM8, 5);
30767        let neg_one = -1;
30768        let r = vcmpss(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30769        r.cast_unsigned()
30770    }
30771}
30772
30773/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30774///
30775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
30776#[inline]
30777#[target_feature(enable = "avx512f")]
30778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30779#[rustc_legacy_const_generics(3)]
30780#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30781pub fn _mm_mask_cmp_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30782    unsafe {
30783        static_assert_uimm_bits!(IMM8, 5);
30784        let r = vcmpss(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30785        r.cast_unsigned()
30786    }
30787}
30788
30789/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30790/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30791///
30792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
30793#[inline]
30794#[target_feature(enable = "avx512f")]
30795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30796#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30797#[rustc_legacy_const_generics(2, 3)]
30798pub fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> __mmask8 {
30799    unsafe {
30800        static_assert_uimm_bits!(IMM5, 5);
30801        static_assert_mantissas_sae!(SAE);
30802        let neg_one = -1;
30803        let r = vcmpss(a, b, IMM5, neg_one, SAE);
30804        r.cast_unsigned()
30805    }
30806}
30807
30808/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
30809/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30810///
30811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
30812#[inline]
30813#[target_feature(enable = "avx512f")]
30814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30815#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30816#[rustc_legacy_const_generics(3, 4)]
30817pub fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
30818    k1: __mmask8,
30819    a: __m128,
30820    b: __m128,
30821) -> __mmask8 {
30822    unsafe {
30823        static_assert_uimm_bits!(IMM5, 5);
30824        static_assert_mantissas_sae!(SAE);
30825        let r = vcmpss(a, b, IMM5, k1 as i8, SAE);
30826        r.cast_unsigned()
30827    }
30828}
30829
30830/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30831///
30832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
30833#[inline]
30834#[target_feature(enable = "avx512f")]
30835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30836#[rustc_legacy_const_generics(2)]
30837#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30838pub fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30839    unsafe {
30840        static_assert_uimm_bits!(IMM8, 5);
30841        let neg_one = -1;
30842        let r = vcmpsd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30843        r.cast_unsigned()
30844    }
30845}
30846
30847/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30848///
30849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
30850#[inline]
30851#[target_feature(enable = "avx512f")]
30852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30853#[rustc_legacy_const_generics(3)]
30854#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30855pub fn _mm_mask_cmp_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30856    unsafe {
30857        static_assert_uimm_bits!(IMM8, 5);
30858        let r = vcmpsd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30859        r.cast_unsigned()
30860    }
30861}
30862
30863/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30864/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30865///
30866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
30867#[inline]
30868#[target_feature(enable = "avx512f")]
30869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30870#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30871#[rustc_legacy_const_generics(2, 3)]
30872pub fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30873    unsafe {
30874        static_assert_uimm_bits!(IMM5, 5);
30875        static_assert_mantissas_sae!(SAE);
30876        let neg_one = -1;
30877        let r = vcmpsd(a, b, IMM5, neg_one, SAE);
30878        r.cast_unsigned()
30879    }
30880}
30881
30882/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
30883/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30884///
30885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
30886#[inline]
30887#[target_feature(enable = "avx512f")]
30888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30889#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30890#[rustc_legacy_const_generics(3, 4)]
30891pub fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
30892    k1: __mmask8,
30893    a: __m128d,
30894    b: __m128d,
30895) -> __mmask8 {
30896    unsafe {
30897        static_assert_uimm_bits!(IMM5, 5);
30898        static_assert_mantissas_sae!(SAE);
30899        let r = vcmpsd(a, b, IMM5, k1 as i8, SAE);
30900        r.cast_unsigned()
30901    }
30902}
30903
30904/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30905///
30906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
30907#[inline]
30908#[target_feature(enable = "avx512f")]
30909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30910#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30911pub fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30912    unsafe { simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16())) }
30913}
30914
30915/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30916///
30917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
30918#[inline]
30919#[target_feature(enable = "avx512f")]
30920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30921#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30922pub fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30923    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30924}
30925
30926/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30927///
30928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
30929#[inline]
30930#[target_feature(enable = "avx512f,avx512vl")]
30931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30932#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30933pub fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30934    unsafe { simd_bitmask::<u32x8, _>(simd_lt(a.as_u32x8(), b.as_u32x8())) }
30935}
30936
30937/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30938///
30939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
30940#[inline]
30941#[target_feature(enable = "avx512f,avx512vl")]
30942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30943#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30944pub fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30945    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30946}
30947
30948/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30949///
30950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
30951#[inline]
30952#[target_feature(enable = "avx512f,avx512vl")]
30953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30954#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30955pub fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30956    unsafe { simd_bitmask::<u32x4, _>(simd_lt(a.as_u32x4(), b.as_u32x4())) }
30957}
30958
30959/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30960///
30961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
30962#[inline]
30963#[target_feature(enable = "avx512f,avx512vl")]
30964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30965#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30966pub fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30967    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30968}
30969
30970/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30971///
30972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
30973#[inline]
30974#[target_feature(enable = "avx512f")]
30975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30976#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30977pub fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30978    unsafe { simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16())) }
30979}
30980
30981/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30982///
30983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
30984#[inline]
30985#[target_feature(enable = "avx512f")]
30986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30987#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30988pub fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30989    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
30990}
30991
30992/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30993///
30994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
30995#[inline]
30996#[target_feature(enable = "avx512f,avx512vl")]
30997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30998#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30999pub fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31000    unsafe { simd_bitmask::<u32x8, _>(simd_gt(a.as_u32x8(), b.as_u32x8())) }
31001}
31002
31003/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31004///
31005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
31006#[inline]
31007#[target_feature(enable = "avx512f,avx512vl")]
31008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31009#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31010pub fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31011    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31012}
31013
31014/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31015///
31016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
31017#[inline]
31018#[target_feature(enable = "avx512f,avx512vl")]
31019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31020#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31021pub fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31022    unsafe { simd_bitmask::<u32x4, _>(simd_gt(a.as_u32x4(), b.as_u32x4())) }
31023}
31024
31025/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31026///
31027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
31028#[inline]
31029#[target_feature(enable = "avx512f,avx512vl")]
31030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31031#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31032pub fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31033    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31034}
31035
31036/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31037///
31038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
31039#[inline]
31040#[target_feature(enable = "avx512f")]
31041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31042#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31043pub fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31044    unsafe { simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16())) }
31045}
31046
31047/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31048///
31049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
31050#[inline]
31051#[target_feature(enable = "avx512f")]
31052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31053#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31054pub fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31055    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31056}
31057
31058/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31059///
31060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
31061#[inline]
31062#[target_feature(enable = "avx512f,avx512vl")]
31063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31064#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31065pub fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31066    unsafe { simd_bitmask::<u32x8, _>(simd_le(a.as_u32x8(), b.as_u32x8())) }
31067}
31068
31069/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31070///
31071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
31072#[inline]
31073#[target_feature(enable = "avx512f,avx512vl")]
31074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31075#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31076pub fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31077    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31078}
31079
31080/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31081///
31082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
31083#[inline]
31084#[target_feature(enable = "avx512f,avx512vl")]
31085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31086#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31087pub fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31088    unsafe { simd_bitmask::<u32x4, _>(simd_le(a.as_u32x4(), b.as_u32x4())) }
31089}
31090
31091/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31092///
31093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
31094#[inline]
31095#[target_feature(enable = "avx512f,avx512vl")]
31096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31097#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31098pub fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31099    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31100}
31101
31102/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31103///
31104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
31105#[inline]
31106#[target_feature(enable = "avx512f")]
31107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31108#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31109pub fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31110    unsafe { simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16())) }
31111}
31112
31113/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31114///
31115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
31116#[inline]
31117#[target_feature(enable = "avx512f")]
31118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31119#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31120pub fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31121    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31122}
31123
31124/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31125///
31126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
31127#[inline]
31128#[target_feature(enable = "avx512f,avx512vl")]
31129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31130#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31131pub fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31132    unsafe { simd_bitmask::<u32x8, _>(simd_ge(a.as_u32x8(), b.as_u32x8())) }
31133}
31134
31135/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31136///
31137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
31138#[inline]
31139#[target_feature(enable = "avx512f,avx512vl")]
31140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31141#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31142pub fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31143    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31144}
31145
31146/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31147///
31148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
31149#[inline]
31150#[target_feature(enable = "avx512f,avx512vl")]
31151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31152#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31153pub fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31154    unsafe { simd_bitmask::<u32x4, _>(simd_ge(a.as_u32x4(), b.as_u32x4())) }
31155}
31156
31157/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31158///
31159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
31160#[inline]
31161#[target_feature(enable = "avx512f,avx512vl")]
31162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31163#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31164pub fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31165    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31166}
31167
31168/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31169///
31170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
31171#[inline]
31172#[target_feature(enable = "avx512f")]
31173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31174#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31175pub fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31176    unsafe { simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16())) }
31177}
31178
31179/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31180///
31181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
31182#[inline]
31183#[target_feature(enable = "avx512f")]
31184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31185#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31186pub fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31187    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31188}
31189
31190/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31191///
31192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
31193#[inline]
31194#[target_feature(enable = "avx512f,avx512vl")]
31195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31196#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31197pub fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31198    unsafe { simd_bitmask::<u32x8, _>(simd_eq(a.as_u32x8(), b.as_u32x8())) }
31199}
31200
31201/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31202///
31203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
31204#[inline]
31205#[target_feature(enable = "avx512f,avx512vl")]
31206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31207#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31208pub fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31209    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31210}
31211
31212/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31213///
31214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
31215#[inline]
31216#[target_feature(enable = "avx512f,avx512vl")]
31217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31218#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31219pub fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31220    unsafe { simd_bitmask::<u32x4, _>(simd_eq(a.as_u32x4(), b.as_u32x4())) }
31221}
31222
31223/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31224///
31225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
31226#[inline]
31227#[target_feature(enable = "avx512f,avx512vl")]
31228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31229#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31230pub fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31231    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31232}
31233
31234/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31235///
31236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
31237#[inline]
31238#[target_feature(enable = "avx512f")]
31239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31240#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31241pub fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31242    unsafe { simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16())) }
31243}
31244
31245/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31246///
31247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
31248#[inline]
31249#[target_feature(enable = "avx512f")]
31250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31251#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31252pub fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31253    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31254}
31255
31256/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31257///
31258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
31259#[inline]
31260#[target_feature(enable = "avx512f,avx512vl")]
31261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31262#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31263pub fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31264    unsafe { simd_bitmask::<u32x8, _>(simd_ne(a.as_u32x8(), b.as_u32x8())) }
31265}
31266
31267/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31268///
31269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
31270#[inline]
31271#[target_feature(enable = "avx512f,avx512vl")]
31272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31273#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31274pub fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31275    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31276}
31277
31278/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31279///
31280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
31281#[inline]
31282#[target_feature(enable = "avx512f,avx512vl")]
31283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31284#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31285pub fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31286    unsafe { simd_bitmask::<u32x4, _>(simd_ne(a.as_u32x4(), b.as_u32x4())) }
31287}
31288
31289/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31290///
31291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
31292#[inline]
31293#[target_feature(enable = "avx512f,avx512vl")]
31294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31295#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31296pub fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31297    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31298}
31299
31300/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31301///
31302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
31303#[inline]
31304#[target_feature(enable = "avx512f")]
31305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31306#[rustc_legacy_const_generics(2)]
31307#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31308pub fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31309    unsafe {
31310        static_assert_uimm_bits!(IMM3, 3);
31311        let a = a.as_u32x16();
31312        let b = b.as_u32x16();
31313        let r = match IMM3 {
31314            0 => simd_eq(a, b),
31315            1 => simd_lt(a, b),
31316            2 => simd_le(a, b),
31317            3 => i32x16::ZERO,
31318            4 => simd_ne(a, b),
31319            5 => simd_ge(a, b),
31320            6 => simd_gt(a, b),
31321            _ => i32x16::splat(-1),
31322        };
31323        simd_bitmask(r)
31324    }
31325}
31326
31327/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31328///
31329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
31330#[inline]
31331#[target_feature(enable = "avx512f")]
31332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31333#[rustc_legacy_const_generics(3)]
31334#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31335pub fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31336    k1: __mmask16,
31337    a: __m512i,
31338    b: __m512i,
31339) -> __mmask16 {
31340    unsafe {
31341        static_assert_uimm_bits!(IMM3, 3);
31342        let a = a.as_u32x16();
31343        let b = b.as_u32x16();
31344        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
31345        let r = match IMM3 {
31346            0 => simd_and(k1, simd_eq(a, b)),
31347            1 => simd_and(k1, simd_lt(a, b)),
31348            2 => simd_and(k1, simd_le(a, b)),
31349            3 => i32x16::ZERO,
31350            4 => simd_and(k1, simd_ne(a, b)),
31351            5 => simd_and(k1, simd_ge(a, b)),
31352            6 => simd_and(k1, simd_gt(a, b)),
31353            _ => k1,
31354        };
31355        simd_bitmask(r)
31356    }
31357}
31358
31359/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31360///
31361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
31362#[inline]
31363#[target_feature(enable = "avx512f,avx512vl")]
31364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31365#[rustc_legacy_const_generics(2)]
31366#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31367pub fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31368    unsafe {
31369        static_assert_uimm_bits!(IMM3, 3);
31370        let a = a.as_u32x8();
31371        let b = b.as_u32x8();
31372        let r = match IMM3 {
31373            0 => simd_eq(a, b),
31374            1 => simd_lt(a, b),
31375            2 => simd_le(a, b),
31376            3 => i32x8::ZERO,
31377            4 => simd_ne(a, b),
31378            5 => simd_ge(a, b),
31379            6 => simd_gt(a, b),
31380            _ => i32x8::splat(-1),
31381        };
31382        simd_bitmask(r)
31383    }
31384}
31385
31386/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31387///
31388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
31389#[inline]
31390#[target_feature(enable = "avx512f,avx512vl")]
31391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31392#[rustc_legacy_const_generics(3)]
31393#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31394pub fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31395    k1: __mmask8,
31396    a: __m256i,
31397    b: __m256i,
31398) -> __mmask8 {
31399    unsafe {
31400        static_assert_uimm_bits!(IMM3, 3);
31401        let a = a.as_u32x8();
31402        let b = b.as_u32x8();
31403        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
31404        let r = match IMM3 {
31405            0 => simd_and(k1, simd_eq(a, b)),
31406            1 => simd_and(k1, simd_lt(a, b)),
31407            2 => simd_and(k1, simd_le(a, b)),
31408            3 => i32x8::ZERO,
31409            4 => simd_and(k1, simd_ne(a, b)),
31410            5 => simd_and(k1, simd_ge(a, b)),
31411            6 => simd_and(k1, simd_gt(a, b)),
31412            _ => k1,
31413        };
31414        simd_bitmask(r)
31415    }
31416}
31417
31418/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31419///
31420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
31421#[inline]
31422#[target_feature(enable = "avx512f,avx512vl")]
31423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31424#[rustc_legacy_const_generics(2)]
31425#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31426pub fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31427    unsafe {
31428        static_assert_uimm_bits!(IMM3, 3);
31429        let a = a.as_u32x4();
31430        let b = b.as_u32x4();
31431        let r = match IMM3 {
31432            0 => simd_eq(a, b),
31433            1 => simd_lt(a, b),
31434            2 => simd_le(a, b),
31435            3 => i32x4::ZERO,
31436            4 => simd_ne(a, b),
31437            5 => simd_ge(a, b),
31438            6 => simd_gt(a, b),
31439            _ => i32x4::splat(-1),
31440        };
31441        simd_bitmask(r)
31442    }
31443}
31444
31445/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31446///
31447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
31448#[inline]
31449#[target_feature(enable = "avx512f,avx512vl")]
31450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31451#[rustc_legacy_const_generics(3)]
31452#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31453pub fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31454    k1: __mmask8,
31455    a: __m128i,
31456    b: __m128i,
31457) -> __mmask8 {
31458    unsafe {
31459        static_assert_uimm_bits!(IMM3, 3);
31460        let a = a.as_u32x4();
31461        let b = b.as_u32x4();
31462        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
31463        let r = match IMM3 {
31464            0 => simd_and(k1, simd_eq(a, b)),
31465            1 => simd_and(k1, simd_lt(a, b)),
31466            2 => simd_and(k1, simd_le(a, b)),
31467            3 => i32x4::ZERO,
31468            4 => simd_and(k1, simd_ne(a, b)),
31469            5 => simd_and(k1, simd_ge(a, b)),
31470            6 => simd_and(k1, simd_gt(a, b)),
31471            _ => k1,
31472        };
31473        simd_bitmask(r)
31474    }
31475}
31476
31477/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31478///
31479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
31480#[inline]
31481#[target_feature(enable = "avx512f")]
31482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31483#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31484pub fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31485    unsafe { simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16())) }
31486}
31487
31488/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31489///
31490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
31491#[inline]
31492#[target_feature(enable = "avx512f")]
31493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31494#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31495pub fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31496    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31497}
31498
31499/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31500///
31501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
31502#[inline]
31503#[target_feature(enable = "avx512f,avx512vl")]
31504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31505#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31506pub fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31507    unsafe { simd_bitmask::<i32x8, _>(simd_lt(a.as_i32x8(), b.as_i32x8())) }
31508}
31509
31510/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31511///
31512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
31513#[inline]
31514#[target_feature(enable = "avx512f,avx512vl")]
31515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31516#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31517pub fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31518    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31519}
31520
31521/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31522///
31523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
31524#[inline]
31525#[target_feature(enable = "avx512f,avx512vl")]
31526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31527#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31528pub fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31529    unsafe { simd_bitmask::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
31530}
31531
31532/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31533///
31534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
31535#[inline]
31536#[target_feature(enable = "avx512f,avx512vl")]
31537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31538#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31539pub fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31540    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31541}
31542
31543/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31544///
31545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
31546#[inline]
31547#[target_feature(enable = "avx512f")]
31548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31549#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31550pub fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31551    unsafe { simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16())) }
31552}
31553
31554/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31555///
31556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
31557#[inline]
31558#[target_feature(enable = "avx512f")]
31559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31560#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31561pub fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31562    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31563}
31564
31565/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31566///
31567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
31568#[inline]
31569#[target_feature(enable = "avx512f,avx512vl")]
31570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31571#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31572pub fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31573    unsafe { simd_bitmask::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
31574}
31575
31576/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31577///
31578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
31579#[inline]
31580#[target_feature(enable = "avx512f,avx512vl")]
31581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31582#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31583pub fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31584    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31585}
31586
31587/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31588///
31589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
31590#[inline]
31591#[target_feature(enable = "avx512f,avx512vl")]
31592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31593#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31594pub fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31595    unsafe { simd_bitmask::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
31596}
31597
31598/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31599///
31600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
31601#[inline]
31602#[target_feature(enable = "avx512f,avx512vl")]
31603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31604#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31605pub fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31606    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31607}
31608
31609/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31610///
31611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
31612#[inline]
31613#[target_feature(enable = "avx512f")]
31614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31615#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31616pub fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31617    unsafe { simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16())) }
31618}
31619
31620/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31621///
31622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
31623#[inline]
31624#[target_feature(enable = "avx512f")]
31625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31626#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31627pub fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31628    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31629}
31630
31631/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31632///
31633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
31634#[inline]
31635#[target_feature(enable = "avx512f,avx512vl")]
31636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31637#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31638pub fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31639    unsafe { simd_bitmask::<i32x8, _>(simd_le(a.as_i32x8(), b.as_i32x8())) }
31640}
31641
31642/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31643///
31644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
31645#[inline]
31646#[target_feature(enable = "avx512f,avx512vl")]
31647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31648#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31649pub fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31650    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31651}
31652
31653/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31654///
31655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
31656#[inline]
31657#[target_feature(enable = "avx512f,avx512vl")]
31658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31659#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31660pub fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31661    unsafe { simd_bitmask::<i32x4, _>(simd_le(a.as_i32x4(), b.as_i32x4())) }
31662}
31663
31664/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31665///
31666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
31667#[inline]
31668#[target_feature(enable = "avx512f,avx512vl")]
31669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31670#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31671pub fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31672    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31673}
31674
31675/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31676///
31677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
31678#[inline]
31679#[target_feature(enable = "avx512f")]
31680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31681#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31682pub fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31683    unsafe { simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16())) }
31684}
31685
31686/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31687///
31688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
31689#[inline]
31690#[target_feature(enable = "avx512f")]
31691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31692#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31693pub fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31694    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31695}
31696
31697/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31698///
31699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
31700#[inline]
31701#[target_feature(enable = "avx512f,avx512vl")]
31702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31703#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31704pub fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31705    unsafe { simd_bitmask::<i32x8, _>(simd_ge(a.as_i32x8(), b.as_i32x8())) }
31706}
31707
31708/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31709///
31710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
31711#[inline]
31712#[target_feature(enable = "avx512f,avx512vl")]
31713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31714#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31715pub fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31716    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31717}
31718
31719/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31720///
31721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
31722#[inline]
31723#[target_feature(enable = "avx512f,avx512vl")]
31724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31725#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31726pub fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31727    unsafe { simd_bitmask::<i32x4, _>(simd_ge(a.as_i32x4(), b.as_i32x4())) }
31728}
31729
31730/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31731///
31732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
31733#[inline]
31734#[target_feature(enable = "avx512f,avx512vl")]
31735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31736#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31737pub fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31738    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31739}
31740
31741/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31742///
31743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
31744#[inline]
31745#[target_feature(enable = "avx512f")]
31746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31747#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31748pub fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31749    unsafe { simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16())) }
31750}
31751
31752/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31753///
31754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
31755#[inline]
31756#[target_feature(enable = "avx512f")]
31757#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31758#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31759pub fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31760    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31761}
31762
31763/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31764///
31765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
31766#[inline]
31767#[target_feature(enable = "avx512f,avx512vl")]
31768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31769#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31770pub fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31771    unsafe { simd_bitmask::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
31772}
31773
31774/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31775///
31776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
31777#[inline]
31778#[target_feature(enable = "avx512f,avx512vl")]
31779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31780#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31781pub fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31782    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31783}
31784
31785/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31786///
31787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
31788#[inline]
31789#[target_feature(enable = "avx512f,avx512vl")]
31790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31791#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31792pub fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31793    unsafe { simd_bitmask::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
31794}
31795
31796/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31797///
31798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
31799#[inline]
31800#[target_feature(enable = "avx512f,avx512vl")]
31801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31802#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31803pub fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31804    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31805}
31806
31807/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31808///
31809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
31810#[inline]
31811#[target_feature(enable = "avx512f")]
31812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31813#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31814pub fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31815    unsafe { simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16())) }
31816}
31817
31818/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31819///
31820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
31821#[inline]
31822#[target_feature(enable = "avx512f")]
31823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31824#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31825pub fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31826    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31827}
31828
31829/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31830///
31831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
31832#[inline]
31833#[target_feature(enable = "avx512f,avx512vl")]
31834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31835#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31836pub fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31837    unsafe { simd_bitmask::<i32x8, _>(simd_ne(a.as_i32x8(), b.as_i32x8())) }
31838}
31839
31840/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31841///
31842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
31843#[inline]
31844#[target_feature(enable = "avx512f,avx512vl")]
31845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31846#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31847pub fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31848    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31849}
31850
31851/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31852///
31853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
31854#[inline]
31855#[target_feature(enable = "avx512f,avx512vl")]
31856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31857#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31858pub fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31859    unsafe { simd_bitmask::<i32x4, _>(simd_ne(a.as_i32x4(), b.as_i32x4())) }
31860}
31861
31862/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31863///
31864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
31865#[inline]
31866#[target_feature(enable = "avx512f,avx512vl")]
31867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31868#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31869pub fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31870    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31871}
31872
31873/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31874///
31875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
31876#[inline]
31877#[target_feature(enable = "avx512f")]
31878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31879#[rustc_legacy_const_generics(2)]
31880#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31881pub fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31882    unsafe {
31883        static_assert_uimm_bits!(IMM3, 3);
31884        let a = a.as_i32x16();
31885        let b = b.as_i32x16();
31886        let r = match IMM3 {
31887            0 => simd_eq(a, b),
31888            1 => simd_lt(a, b),
31889            2 => simd_le(a, b),
31890            3 => i32x16::ZERO,
31891            4 => simd_ne(a, b),
31892            5 => simd_ge(a, b),
31893            6 => simd_gt(a, b),
31894            _ => i32x16::splat(-1),
31895        };
31896        simd_bitmask(r)
31897    }
31898}
31899
31900/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31901///
31902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
31903#[inline]
31904#[target_feature(enable = "avx512f")]
31905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31906#[rustc_legacy_const_generics(3)]
31907#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31908pub fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31909    k1: __mmask16,
31910    a: __m512i,
31911    b: __m512i,
31912) -> __mmask16 {
31913    unsafe {
31914        static_assert_uimm_bits!(IMM3, 3);
31915        let a = a.as_i32x16();
31916        let b = b.as_i32x16();
31917        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
31918        let r = match IMM3 {
31919            0 => simd_and(k1, simd_eq(a, b)),
31920            1 => simd_and(k1, simd_lt(a, b)),
31921            2 => simd_and(k1, simd_le(a, b)),
31922            3 => i32x16::ZERO,
31923            4 => simd_and(k1, simd_ne(a, b)),
31924            5 => simd_and(k1, simd_ge(a, b)),
31925            6 => simd_and(k1, simd_gt(a, b)),
31926            _ => k1,
31927        };
31928        simd_bitmask(r)
31929    }
31930}
31931
31932/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31933///
31934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
31935#[inline]
31936#[target_feature(enable = "avx512f,avx512vl")]
31937#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31938#[rustc_legacy_const_generics(2)]
31939#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31940pub fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31941    unsafe {
31942        static_assert_uimm_bits!(IMM3, 3);
31943        let a = a.as_i32x8();
31944        let b = b.as_i32x8();
31945        let r = match IMM3 {
31946            0 => simd_eq(a, b),
31947            1 => simd_lt(a, b),
31948            2 => simd_le(a, b),
31949            3 => i32x8::ZERO,
31950            4 => simd_ne(a, b),
31951            5 => simd_ge(a, b),
31952            6 => simd_gt(a, b),
31953            _ => i32x8::splat(-1),
31954        };
31955        simd_bitmask(r)
31956    }
31957}
31958
31959/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31960///
31961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
31962#[inline]
31963#[target_feature(enable = "avx512f,avx512vl")]
31964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31965#[rustc_legacy_const_generics(3)]
31966#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31967pub fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31968    k1: __mmask8,
31969    a: __m256i,
31970    b: __m256i,
31971) -> __mmask8 {
31972    unsafe {
31973        static_assert_uimm_bits!(IMM3, 3);
31974        let a = a.as_i32x8();
31975        let b = b.as_i32x8();
31976        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
31977        let r = match IMM3 {
31978            0 => simd_and(k1, simd_eq(a, b)),
31979            1 => simd_and(k1, simd_lt(a, b)),
31980            2 => simd_and(k1, simd_le(a, b)),
31981            3 => i32x8::ZERO,
31982            4 => simd_and(k1, simd_ne(a, b)),
31983            5 => simd_and(k1, simd_ge(a, b)),
31984            6 => simd_and(k1, simd_gt(a, b)),
31985            _ => k1,
31986        };
31987        simd_bitmask(r)
31988    }
31989}
31990
31991/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31992///
31993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
31994#[inline]
31995#[target_feature(enable = "avx512f,avx512vl")]
31996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31997#[rustc_legacy_const_generics(2)]
31998#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31999pub fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32000    unsafe {
32001        static_assert_uimm_bits!(IMM3, 3);
32002        let a = a.as_i32x4();
32003        let b = b.as_i32x4();
32004        let r = match IMM3 {
32005            0 => simd_eq(a, b),
32006            1 => simd_lt(a, b),
32007            2 => simd_le(a, b),
32008            3 => i32x4::ZERO,
32009            4 => simd_ne(a, b),
32010            5 => simd_ge(a, b),
32011            6 => simd_gt(a, b),
32012            _ => i32x4::splat(-1),
32013        };
32014        simd_bitmask(r)
32015    }
32016}
32017
32018/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32019///
32020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
32021#[inline]
32022#[target_feature(enable = "avx512f,avx512vl")]
32023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32024#[rustc_legacy_const_generics(3)]
32025#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32026pub fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
32027    k1: __mmask8,
32028    a: __m128i,
32029    b: __m128i,
32030) -> __mmask8 {
32031    unsafe {
32032        static_assert_uimm_bits!(IMM3, 3);
32033        let a = a.as_i32x4();
32034        let b = b.as_i32x4();
32035        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
32036        let r = match IMM3 {
32037            0 => simd_and(k1, simd_eq(a, b)),
32038            1 => simd_and(k1, simd_lt(a, b)),
32039            2 => simd_and(k1, simd_le(a, b)),
32040            3 => i32x4::ZERO,
32041            4 => simd_and(k1, simd_ne(a, b)),
32042            5 => simd_and(k1, simd_ge(a, b)),
32043            6 => simd_and(k1, simd_gt(a, b)),
32044            _ => k1,
32045        };
32046        simd_bitmask(r)
32047    }
32048}
32049
32050/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32051///
32052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
32053#[inline]
32054#[target_feature(enable = "avx512f")]
32055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32056#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32057pub fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32058    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) }
32059}
32060
32061/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32062///
32063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
32064#[inline]
32065#[target_feature(enable = "avx512f")]
32066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32067#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32068pub fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32069    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32070}
32071
32072/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32073///
32074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
32075#[inline]
32076#[target_feature(enable = "avx512f,avx512vl")]
32077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32078#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32079pub fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32080    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_u64x4(), b.as_u64x4())) }
32081}
32082
32083/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32084///
32085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
32086#[inline]
32087#[target_feature(enable = "avx512f,avx512vl")]
32088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32089#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32090pub fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32091    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32092}
32093
32094/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32095///
32096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
32097#[inline]
32098#[target_feature(enable = "avx512f,avx512vl")]
32099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32100#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32101pub fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32102    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_u64x2(), b.as_u64x2())) }
32103}
32104
32105/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32106///
32107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
32108#[inline]
32109#[target_feature(enable = "avx512f,avx512vl")]
32110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32111#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32112pub fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32113    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32114}
32115
32116/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32117///
32118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
32119#[inline]
32120#[target_feature(enable = "avx512f")]
32121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32122#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32123pub fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32124    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) }
32125}
32126
32127/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32128///
32129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
32130#[inline]
32131#[target_feature(enable = "avx512f")]
32132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32133#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32134pub fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32135    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32136}
32137
32138/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32139///
32140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
32141#[inline]
32142#[target_feature(enable = "avx512f,avx512vl")]
32143#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32144#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32145pub fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32146    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_u64x4(), b.as_u64x4())) }
32147}
32148
32149/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32150///
32151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
32152#[inline]
32153#[target_feature(enable = "avx512f,avx512vl")]
32154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32155#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32156pub fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32157    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32158}
32159
32160/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32161///
32162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
32163#[inline]
32164#[target_feature(enable = "avx512f,avx512vl")]
32165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32166#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32167pub fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32168    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_u64x2(), b.as_u64x2())) }
32169}
32170
32171/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32172///
32173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
32174#[inline]
32175#[target_feature(enable = "avx512f,avx512vl")]
32176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32177#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32178pub fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32179    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32180}
32181
32182/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32183///
32184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
32185#[inline]
32186#[target_feature(enable = "avx512f")]
32187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32188#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32189pub fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32190    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) }
32191}
32192
32193/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32194///
32195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
32196#[inline]
32197#[target_feature(enable = "avx512f")]
32198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32199#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32200pub fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32201    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32202}
32203
32204/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32205///
32206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
32207#[inline]
32208#[target_feature(enable = "avx512f,avx512vl")]
32209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32210#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32211pub fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32212    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_u64x4(), b.as_u64x4())) }
32213}
32214
32215/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32216///
32217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
32218#[inline]
32219#[target_feature(enable = "avx512f,avx512vl")]
32220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32221#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32222pub fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32223    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32224}
32225
32226/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32227///
32228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
32229#[inline]
32230#[target_feature(enable = "avx512f,avx512vl")]
32231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32232#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32233pub fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32234    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_u64x2(), b.as_u64x2())) }
32235}
32236
32237/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32238///
32239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
32240#[inline]
32241#[target_feature(enable = "avx512f,avx512vl")]
32242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32243#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32244pub fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32245    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32246}
32247
32248/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32249///
32250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
32251#[inline]
32252#[target_feature(enable = "avx512f")]
32253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32254#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32255pub fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32256    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) }
32257}
32258
32259/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32260///
32261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
32262#[inline]
32263#[target_feature(enable = "avx512f")]
32264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32265#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32266pub fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32267    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32268}
32269
32270/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32271///
32272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
32273#[inline]
32274#[target_feature(enable = "avx512f,avx512vl")]
32275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32276#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32277pub fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32278    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_u64x4(), b.as_u64x4())) }
32279}
32280
32281/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32282///
32283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
32284#[inline]
32285#[target_feature(enable = "avx512f,avx512vl")]
32286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32287#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32288pub fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32289    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32290}
32291
32292/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32293///
32294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
32295#[inline]
32296#[target_feature(enable = "avx512f,avx512vl")]
32297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32298#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32299pub fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32300    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_u64x2(), b.as_u64x2())) }
32301}
32302
32303/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32304///
32305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
32306#[inline]
32307#[target_feature(enable = "avx512f,avx512vl")]
32308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32309#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32310pub fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32311    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32312}
32313
32314/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32315///
32316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
32317#[inline]
32318#[target_feature(enable = "avx512f")]
32319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32320#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32321pub fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32322    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) }
32323}
32324
32325/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32326///
32327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
32328#[inline]
32329#[target_feature(enable = "avx512f")]
32330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32331#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32332pub fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32333    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32334}
32335
32336/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32337///
32338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
32339#[inline]
32340#[target_feature(enable = "avx512f,avx512vl")]
32341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32342#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32343pub fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32344    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_u64x4(), b.as_u64x4())) }
32345}
32346
32347/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32348///
32349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
32350#[inline]
32351#[target_feature(enable = "avx512f,avx512vl")]
32352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32353#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32354pub fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32355    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32356}
32357
32358/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32359///
32360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
32361#[inline]
32362#[target_feature(enable = "avx512f,avx512vl")]
32363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32364#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32365pub fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32366    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_u64x2(), b.as_u64x2())) }
32367}
32368
32369/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32370///
32371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
32372#[inline]
32373#[target_feature(enable = "avx512f,avx512vl")]
32374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32375#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32376pub fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32377    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32378}
32379
32380/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32381///
32382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
32383#[inline]
32384#[target_feature(enable = "avx512f")]
32385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32386#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32387pub fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32388    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) }
32389}
32390
32391/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32392///
32393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
32394#[inline]
32395#[target_feature(enable = "avx512f")]
32396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32397#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32398pub fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32399    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32400}
32401
32402/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32403///
32404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
32405#[inline]
32406#[target_feature(enable = "avx512f,avx512vl")]
32407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32408#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32409pub fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32410    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_u64x4(), b.as_u64x4())) }
32411}
32412
32413/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32414///
32415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
32416#[inline]
32417#[target_feature(enable = "avx512f,avx512vl")]
32418#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32419#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32420pub fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32421    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32422}
32423
32424/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32425///
32426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
32427#[inline]
32428#[target_feature(enable = "avx512f,avx512vl")]
32429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32430#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32431pub fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32432    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_u64x2(), b.as_u64x2())) }
32433}
32434
32435/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32436///
32437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
32438#[inline]
32439#[target_feature(enable = "avx512f,avx512vl")]
32440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32441#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32442pub fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32443    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32444}
32445
32446/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32447///
32448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
32449#[inline]
32450#[target_feature(enable = "avx512f")]
32451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32452#[rustc_legacy_const_generics(2)]
32453#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32454pub fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
32455    unsafe {
32456        static_assert_uimm_bits!(IMM3, 3);
32457        let a = a.as_u64x8();
32458        let b = b.as_u64x8();
32459        let r = match IMM3 {
32460            0 => simd_eq(a, b),
32461            1 => simd_lt(a, b),
32462            2 => simd_le(a, b),
32463            3 => i64x8::ZERO,
32464            4 => simd_ne(a, b),
32465            5 => simd_ge(a, b),
32466            6 => simd_gt(a, b),
32467            _ => i64x8::splat(-1),
32468        };
32469        simd_bitmask(r)
32470    }
32471}
32472
32473/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32474///
32475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
32476#[inline]
32477#[target_feature(enable = "avx512f")]
32478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32479#[rustc_legacy_const_generics(3)]
32480#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32481pub fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32482    k1: __mmask8,
32483    a: __m512i,
32484    b: __m512i,
32485) -> __mmask8 {
32486    unsafe {
32487        static_assert_uimm_bits!(IMM3, 3);
32488        let a = a.as_u64x8();
32489        let b = b.as_u64x8();
32490        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
32491        let r = match IMM3 {
32492            0 => simd_and(k1, simd_eq(a, b)),
32493            1 => simd_and(k1, simd_lt(a, b)),
32494            2 => simd_and(k1, simd_le(a, b)),
32495            3 => i64x8::ZERO,
32496            4 => simd_and(k1, simd_ne(a, b)),
32497            5 => simd_and(k1, simd_ge(a, b)),
32498            6 => simd_and(k1, simd_gt(a, b)),
32499            _ => k1,
32500        };
32501        simd_bitmask(r)
32502    }
32503}
32504
32505/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32506///
32507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
32508#[inline]
32509#[target_feature(enable = "avx512f,avx512vl")]
32510#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32511#[rustc_legacy_const_generics(2)]
32512#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32513pub fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
32514    unsafe {
32515        static_assert_uimm_bits!(IMM3, 3);
32516        let a = a.as_u64x4();
32517        let b = b.as_u64x4();
32518        let r = match IMM3 {
32519            0 => simd_eq(a, b),
32520            1 => simd_lt(a, b),
32521            2 => simd_le(a, b),
32522            3 => i64x4::ZERO,
32523            4 => simd_ne(a, b),
32524            5 => simd_ge(a, b),
32525            6 => simd_gt(a, b),
32526            _ => i64x4::splat(-1),
32527        };
32528        simd_bitmask(r)
32529    }
32530}
32531
32532/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32533///
32534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
32535#[inline]
32536#[target_feature(enable = "avx512f,avx512vl")]
32537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32538#[rustc_legacy_const_generics(3)]
32539#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32540pub fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32541    k1: __mmask8,
32542    a: __m256i,
32543    b: __m256i,
32544) -> __mmask8 {
32545    unsafe {
32546        static_assert_uimm_bits!(IMM3, 3);
32547        let a = a.as_u64x4();
32548        let b = b.as_u64x4();
32549        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
32550        let r = match IMM3 {
32551            0 => simd_and(k1, simd_eq(a, b)),
32552            1 => simd_and(k1, simd_lt(a, b)),
32553            2 => simd_and(k1, simd_le(a, b)),
32554            3 => i64x4::ZERO,
32555            4 => simd_and(k1, simd_ne(a, b)),
32556            5 => simd_and(k1, simd_ge(a, b)),
32557            6 => simd_and(k1, simd_gt(a, b)),
32558            _ => k1,
32559        };
32560        simd_bitmask(r)
32561    }
32562}
32563
32564/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32565///
32566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
32567#[inline]
32568#[target_feature(enable = "avx512f,avx512vl")]
32569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32570#[rustc_legacy_const_generics(2)]
32571#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32572pub fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32573    unsafe {
32574        static_assert_uimm_bits!(IMM3, 3);
32575        let a = a.as_u64x2();
32576        let b = b.as_u64x2();
32577        let r = match IMM3 {
32578            0 => simd_eq(a, b),
32579            1 => simd_lt(a, b),
32580            2 => simd_le(a, b),
32581            3 => i64x2::ZERO,
32582            4 => simd_ne(a, b),
32583            5 => simd_ge(a, b),
32584            6 => simd_gt(a, b),
32585            _ => i64x2::splat(-1),
32586        };
32587        simd_bitmask(r)
32588    }
32589}
32590
32591/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32592///
32593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
32594#[inline]
32595#[target_feature(enable = "avx512f,avx512vl")]
32596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32597#[rustc_legacy_const_generics(3)]
32598#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32599pub fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32600    k1: __mmask8,
32601    a: __m128i,
32602    b: __m128i,
32603) -> __mmask8 {
32604    unsafe {
32605        static_assert_uimm_bits!(IMM3, 3);
32606        let a = a.as_u64x2();
32607        let b = b.as_u64x2();
32608        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
32609        let r = match IMM3 {
32610            0 => simd_and(k1, simd_eq(a, b)),
32611            1 => simd_and(k1, simd_lt(a, b)),
32612            2 => simd_and(k1, simd_le(a, b)),
32613            3 => i64x2::ZERO,
32614            4 => simd_and(k1, simd_ne(a, b)),
32615            5 => simd_and(k1, simd_ge(a, b)),
32616            6 => simd_and(k1, simd_gt(a, b)),
32617            _ => k1,
32618        };
32619        simd_bitmask(r)
32620    }
32621}
32622
32623/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32624///
32625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
32626#[inline]
32627#[target_feature(enable = "avx512f")]
32628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32629#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32630pub fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32631    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) }
32632}
32633
32634/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32635///
32636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
32637#[inline]
32638#[target_feature(enable = "avx512f")]
32639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32640#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32641pub fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32642    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32643}
32644
32645/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32646///
32647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
32648#[inline]
32649#[target_feature(enable = "avx512f,avx512vl")]
32650#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32651#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32652pub fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32653    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_i64x4(), b.as_i64x4())) }
32654}
32655
32656/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32657///
32658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
32659#[inline]
32660#[target_feature(enable = "avx512f,avx512vl")]
32661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32662#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32663pub fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32664    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32665}
32666
32667/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32668///
32669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
32670#[inline]
32671#[target_feature(enable = "avx512f,avx512vl")]
32672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32673#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32674pub fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32675    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_i64x2(), b.as_i64x2())) }
32676}
32677
32678/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32679///
32680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
32681#[inline]
32682#[target_feature(enable = "avx512f,avx512vl")]
32683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32684#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32685pub fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32686    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32687}
32688
32689/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32690///
32691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
32692#[inline]
32693#[target_feature(enable = "avx512f")]
32694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32695#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32696pub fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32697    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) }
32698}
32699
32700/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32701///
32702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
32703#[inline]
32704#[target_feature(enable = "avx512f")]
32705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32706#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32707pub fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32708    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32709}
32710
32711/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32712///
32713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
32714#[inline]
32715#[target_feature(enable = "avx512f,avx512vl")]
32716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32717#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32718pub fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32719    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
32720}
32721
32722/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32723///
32724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
32725#[inline]
32726#[target_feature(enable = "avx512f,avx512vl")]
32727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32728#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32729pub fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32730    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32731}
32732
32733/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32734///
32735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
32736#[inline]
32737#[target_feature(enable = "avx512f,avx512vl")]
32738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32739#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32740pub fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32741    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_i64x2(), b.as_i64x2())) }
32742}
32743
32744/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32745///
32746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
32747#[inline]
32748#[target_feature(enable = "avx512f,avx512vl")]
32749#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32750#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32751pub fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32752    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32753}
32754
32755/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32756///
32757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
32758#[inline]
32759#[target_feature(enable = "avx512f")]
32760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32761#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32762pub fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32763    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) }
32764}
32765
32766/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32767///
32768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
32769#[inline]
32770#[target_feature(enable = "avx512f")]
32771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32772#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32773pub fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32774    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32775}
32776
32777/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32778///
32779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
32780#[inline]
32781#[target_feature(enable = "avx512f,avx512vl")]
32782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32783#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32784pub fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32785    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_i64x4(), b.as_i64x4())) }
32786}
32787
32788/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32789///
32790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
32791#[inline]
32792#[target_feature(enable = "avx512f,avx512vl")]
32793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32794#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32795pub fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32796    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32797}
32798
32799/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32800///
32801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
32802#[inline]
32803#[target_feature(enable = "avx512f,avx512vl")]
32804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32805#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32806pub fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32807    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_i64x2(), b.as_i64x2())) }
32808}
32809
32810/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32811///
32812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
32813#[inline]
32814#[target_feature(enable = "avx512f,avx512vl")]
32815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32816#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32817pub fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32818    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32819}
32820
32821/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32822///
32823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
32824#[inline]
32825#[target_feature(enable = "avx512f")]
32826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32827#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32828pub fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32829    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) }
32830}
32831
32832/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32833///
32834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
32835#[inline]
32836#[target_feature(enable = "avx512f")]
32837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32838#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32839pub fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32840    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32841}
32842
32843/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32844///
32845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
32846#[inline]
32847#[target_feature(enable = "avx512f,avx512vl")]
32848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32849#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32850pub fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32851    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_i64x4(), b.as_i64x4())) }
32852}
32853
32854/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32855///
32856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
32857#[inline]
32858#[target_feature(enable = "avx512f,avx512vl")]
32859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32860#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32861pub fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32862    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32863}
32864
32865/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32866///
32867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
32868#[inline]
32869#[target_feature(enable = "avx512f,avx512vl")]
32870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32871#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32872pub fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32873    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_i64x2(), b.as_i64x2())) }
32874}
32875
32876/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32877///
32878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
32879#[inline]
32880#[target_feature(enable = "avx512f,avx512vl")]
32881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32882#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32883pub fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32884    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32885}
32886
32887/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32888///
32889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
32890#[inline]
32891#[target_feature(enable = "avx512f")]
32892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32893#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32894pub fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32895    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) }
32896}
32897
32898/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32899///
32900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
32901#[inline]
32902#[target_feature(enable = "avx512f")]
32903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32904#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32905pub fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32906    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32907}
32908
32909/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32910///
32911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
32912#[inline]
32913#[target_feature(enable = "avx512f,avx512vl")]
32914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32915#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32916pub fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32917    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
32918}
32919
32920/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32921///
32922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
32923#[inline]
32924#[target_feature(enable = "avx512f,avx512vl")]
32925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32926#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32927pub fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32928    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32929}
32930
32931/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32932///
32933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
32934#[inline]
32935#[target_feature(enable = "avx512f,avx512vl")]
32936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32937#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32938pub fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32939    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_i64x2(), b.as_i64x2())) }
32940}
32941
32942/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32943///
32944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
32945#[inline]
32946#[target_feature(enable = "avx512f,avx512vl")]
32947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32948#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32949pub fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32950    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32951}
32952
32953/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32954///
32955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
32956#[inline]
32957#[target_feature(enable = "avx512f")]
32958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32959#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32960pub fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32961    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) }
32962}
32963
32964/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32965///
32966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
32967#[inline]
32968#[target_feature(enable = "avx512f")]
32969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32970#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32971pub fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32972    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32973}
32974
32975/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32976///
32977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
32978#[inline]
32979#[target_feature(enable = "avx512f,avx512vl")]
32980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32981#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32982pub fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32983    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_i64x4(), b.as_i64x4())) }
32984}
32985
32986/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32987///
32988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
32989#[inline]
32990#[target_feature(enable = "avx512f,avx512vl")]
32991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32992#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32993pub fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32994    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32995}
32996
32997/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32998///
32999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
33000#[inline]
33001#[target_feature(enable = "avx512f,avx512vl")]
33002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33003#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
33004pub fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33005    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_i64x2(), b.as_i64x2())) }
33006}
33007
33008/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33009///
33010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
33011#[inline]
33012#[target_feature(enable = "avx512f,avx512vl")]
33013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33014#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
33015pub fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33016    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
33017}
33018
33019/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33020///
33021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
33022#[inline]
33023#[target_feature(enable = "avx512f")]
33024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33025#[rustc_legacy_const_generics(2)]
33026#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33027pub fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
33028    unsafe {
33029        static_assert_uimm_bits!(IMM3, 3);
33030        let a = a.as_i64x8();
33031        let b = b.as_i64x8();
33032        let r = match IMM3 {
33033            0 => simd_eq(a, b),
33034            1 => simd_lt(a, b),
33035            2 => simd_le(a, b),
33036            3 => i64x8::ZERO,
33037            4 => simd_ne(a, b),
33038            5 => simd_ge(a, b),
33039            6 => simd_gt(a, b),
33040            _ => i64x8::splat(-1),
33041        };
33042        simd_bitmask(r)
33043    }
33044}
33045
33046/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33047///
33048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
33049#[inline]
33050#[target_feature(enable = "avx512f")]
33051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33052#[rustc_legacy_const_generics(3)]
33053#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33054pub fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33055    k1: __mmask8,
33056    a: __m512i,
33057    b: __m512i,
33058) -> __mmask8 {
33059    unsafe {
33060        static_assert_uimm_bits!(IMM3, 3);
33061        let a = a.as_i64x8();
33062        let b = b.as_i64x8();
33063        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
33064        let r = match IMM3 {
33065            0 => simd_and(k1, simd_eq(a, b)),
33066            1 => simd_and(k1, simd_lt(a, b)),
33067            2 => simd_and(k1, simd_le(a, b)),
33068            3 => i64x8::ZERO,
33069            4 => simd_and(k1, simd_ne(a, b)),
33070            5 => simd_and(k1, simd_ge(a, b)),
33071            6 => simd_and(k1, simd_gt(a, b)),
33072            _ => k1,
33073        };
33074        simd_bitmask(r)
33075    }
33076}
33077
33078/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33079///
33080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
33081#[inline]
33082#[target_feature(enable = "avx512f,avx512vl")]
33083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33084#[rustc_legacy_const_generics(2)]
33085#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33086pub fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
33087    unsafe {
33088        static_assert_uimm_bits!(IMM3, 3);
33089        let a = a.as_i64x4();
33090        let b = b.as_i64x4();
33091        let r = match IMM3 {
33092            0 => simd_eq(a, b),
33093            1 => simd_lt(a, b),
33094            2 => simd_le(a, b),
33095            3 => i64x4::ZERO,
33096            4 => simd_ne(a, b),
33097            5 => simd_ge(a, b),
33098            6 => simd_gt(a, b),
33099            _ => i64x4::splat(-1),
33100        };
33101        simd_bitmask(r)
33102    }
33103}
33104
33105/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33106///
33107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
33108#[inline]
33109#[target_feature(enable = "avx512f,avx512vl")]
33110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33111#[rustc_legacy_const_generics(3)]
33112#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33113pub fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33114    k1: __mmask8,
33115    a: __m256i,
33116    b: __m256i,
33117) -> __mmask8 {
33118    unsafe {
33119        static_assert_uimm_bits!(IMM3, 3);
33120        let a = a.as_i64x4();
33121        let b = b.as_i64x4();
33122        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
33123        let r = match IMM3 {
33124            0 => simd_and(k1, simd_eq(a, b)),
33125            1 => simd_and(k1, simd_lt(a, b)),
33126            2 => simd_and(k1, simd_le(a, b)),
33127            3 => i64x4::ZERO,
33128            4 => simd_and(k1, simd_ne(a, b)),
33129            5 => simd_and(k1, simd_ge(a, b)),
33130            6 => simd_and(k1, simd_gt(a, b)),
33131            _ => k1,
33132        };
33133        simd_bitmask(r)
33134    }
33135}
33136
33137/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33138///
33139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
33140#[inline]
33141#[target_feature(enable = "avx512f,avx512vl")]
33142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33143#[rustc_legacy_const_generics(2)]
33144#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33145pub fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
33146    unsafe {
33147        static_assert_uimm_bits!(IMM3, 3);
33148        let a = a.as_i64x2();
33149        let b = b.as_i64x2();
33150        let r = match IMM3 {
33151            0 => simd_eq(a, b),
33152            1 => simd_lt(a, b),
33153            2 => simd_le(a, b),
33154            3 => i64x2::ZERO,
33155            4 => simd_ne(a, b),
33156            5 => simd_ge(a, b),
33157            6 => simd_gt(a, b),
33158            _ => i64x2::splat(-1),
33159        };
33160        simd_bitmask(r)
33161    }
33162}
33163
33164/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33165///
33166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
33167#[inline]
33168#[target_feature(enable = "avx512f,avx512vl")]
33169#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33170#[rustc_legacy_const_generics(3)]
33171#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33172pub fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33173    k1: __mmask8,
33174    a: __m128i,
33175    b: __m128i,
33176) -> __mmask8 {
33177    unsafe {
33178        static_assert_uimm_bits!(IMM3, 3);
33179        let a = a.as_i64x2();
33180        let b = b.as_i64x2();
33181        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
33182        let r = match IMM3 {
33183            0 => simd_and(k1, simd_eq(a, b)),
33184            1 => simd_and(k1, simd_lt(a, b)),
33185            2 => simd_and(k1, simd_le(a, b)),
33186            3 => i64x2::ZERO,
33187            4 => simd_and(k1, simd_ne(a, b)),
33188            5 => simd_and(k1, simd_ge(a, b)),
33189            6 => simd_and(k1, simd_gt(a, b)),
33190            _ => k1,
33191        };
33192        simd_bitmask(r)
33193    }
33194}
33195
33196/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
33197///
33198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
33199#[inline]
33200#[target_feature(enable = "avx512f")]
33201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33202pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
33203    unsafe { simd_reduce_add_unordered(a.as_i32x16()) }
33204}
33205
33206/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33207///
33208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
33209#[inline]
33210#[target_feature(enable = "avx512f")]
33211#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33212pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
33213    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
33214}
33215
33216/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
33217///
33218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
33219#[inline]
33220#[target_feature(enable = "avx512f")]
33221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33222pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
33223    unsafe { simd_reduce_add_unordered(a.as_i64x8()) }
33224}
33225
33226/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33227///
33228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
33229#[inline]
33230#[target_feature(enable = "avx512f")]
33231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33232pub fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
33233    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
33234}
33235
33236/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33237///
33238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
33239#[inline]
33240#[target_feature(enable = "avx512f")]
33241#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33242pub fn _mm512_reduce_add_ps(a: __m512) -> f32 {
33243    unsafe {
33244        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33245        let a = _mm256_add_ps(
33246            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33247            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33248        );
33249        let a = _mm_add_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33250        let a = _mm_add_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33251        simd_extract::<_, f32>(a, 0) + simd_extract::<_, f32>(a, 1)
33252    }
33253}
33254
33255/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33256///
33257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
33258#[inline]
33259#[target_feature(enable = "avx512f")]
33260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33261pub fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
33262    unsafe { _mm512_reduce_add_ps(simd_select_bitmask(k, a, _mm512_setzero_ps())) }
33263}
33264
33265/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33266///
33267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
33268#[inline]
33269#[target_feature(enable = "avx512f")]
33270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33271pub fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
33272    unsafe {
33273        let a = _mm256_add_pd(
33274            _mm512_extractf64x4_pd::<0>(a),
33275            _mm512_extractf64x4_pd::<1>(a),
33276        );
33277        let a = _mm_add_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33278        simd_extract::<_, f64>(a, 0) + simd_extract::<_, f64>(a, 1)
33279    }
33280}
33281
33282/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33283///
33284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
33285#[inline]
33286#[target_feature(enable = "avx512f")]
33287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33288pub fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
33289    unsafe { _mm512_reduce_add_pd(simd_select_bitmask(k, a, _mm512_setzero_pd())) }
33290}
33291
33292/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
33293///
33294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
33295#[inline]
33296#[target_feature(enable = "avx512f")]
33297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33298pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
33299    unsafe { simd_reduce_mul_unordered(a.as_i32x16()) }
33300}
33301
33302/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33303///
33304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
33305#[inline]
33306#[target_feature(enable = "avx512f")]
33307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33308pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
33309    unsafe {
33310        simd_reduce_mul_unordered(simd_select_bitmask(
33311            k,
33312            a.as_i32x16(),
33313            _mm512_set1_epi32(1).as_i32x16(),
33314        ))
33315    }
33316}
33317
33318/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
33319///
33320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
33321#[inline]
33322#[target_feature(enable = "avx512f")]
33323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33324pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
33325    unsafe { simd_reduce_mul_unordered(a.as_i64x8()) }
33326}
33327
33328/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33329///
33330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
33331#[inline]
33332#[target_feature(enable = "avx512f")]
33333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33334pub fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
33335    unsafe {
33336        simd_reduce_mul_unordered(simd_select_bitmask(
33337            k,
33338            a.as_i64x8(),
33339            _mm512_set1_epi64(1).as_i64x8(),
33340        ))
33341    }
33342}
33343
33344/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33345///
33346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
33347#[inline]
33348#[target_feature(enable = "avx512f")]
33349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33350pub fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
33351    unsafe {
33352        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33353        let a = _mm256_mul_ps(
33354            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33355            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33356        );
33357        let a = _mm_mul_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33358        let a = _mm_mul_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33359        simd_extract::<_, f32>(a, 0) * simd_extract::<_, f32>(a, 1)
33360    }
33361}
33362
33363/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33364///
33365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
33366#[inline]
33367#[target_feature(enable = "avx512f")]
33368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33369pub fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
33370    unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(k, a, _mm512_set1_ps(1.))) }
33371}
33372
33373/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33374///
33375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
33376#[inline]
33377#[target_feature(enable = "avx512f")]
33378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33379pub fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
33380    unsafe {
33381        let a = _mm256_mul_pd(
33382            _mm512_extractf64x4_pd::<0>(a),
33383            _mm512_extractf64x4_pd::<1>(a),
33384        );
33385        let a = _mm_mul_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33386        simd_extract::<_, f64>(a, 0) * simd_extract::<_, f64>(a, 1)
33387    }
33388}
33389
33390/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33391///
33392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
33393#[inline]
33394#[target_feature(enable = "avx512f")]
33395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33396pub fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
33397    unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(k, a, _mm512_set1_pd(1.))) }
33398}
33399
33400/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33401///
33402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
33403#[inline]
33404#[target_feature(enable = "avx512f")]
33405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33406pub fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
33407    unsafe { simd_reduce_max(a.as_i32x16()) }
33408}
33409
33410/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33411///
33412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
33413#[inline]
33414#[target_feature(enable = "avx512f")]
33415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33416pub fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
33417    unsafe {
33418        simd_reduce_max(simd_select_bitmask(
33419            k,
33420            a.as_i32x16(),
33421            i32x16::splat(i32::MIN),
33422        ))
33423    }
33424}
33425
33426/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33427///
33428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
33429#[inline]
33430#[target_feature(enable = "avx512f")]
33431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33432pub fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
33433    unsafe { simd_reduce_max(a.as_i64x8()) }
33434}
33435
33436/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33437///
33438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
33439#[inline]
33440#[target_feature(enable = "avx512f")]
33441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33442pub fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
33443    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN))) }
33444}
33445
33446/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33447///
33448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
33449#[inline]
33450#[target_feature(enable = "avx512f")]
33451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33452pub fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
33453    unsafe { simd_reduce_max(a.as_u32x16()) }
33454}
33455
33456/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33457///
33458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579)
33459#[inline]
33460#[target_feature(enable = "avx512f")]
33461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33462pub fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
33463    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u32x16(), u32x16::ZERO)) }
33464}
33465
33466/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33467///
33468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582)
33469#[inline]
33470#[target_feature(enable = "avx512f")]
33471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33472pub fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
33473    unsafe { simd_reduce_max(a.as_u64x8()) }
33474}
33475
33476/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33477///
33478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581)
33479#[inline]
33480#[target_feature(enable = "avx512f")]
33481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33482pub fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
33483    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u64x8(), u64x8::ZERO)) }
33484}
33485
33486/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33487///
33488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586)
33489#[inline]
33490#[target_feature(enable = "avx512f")]
33491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33492pub fn _mm512_reduce_max_ps(a: __m512) -> f32 {
33493    unsafe {
33494        let a = _mm256_max_ps(
33495            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33496            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33497        );
33498        let a = _mm_max_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33499        let a = _mm_max_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33500        _mm_cvtss_f32(_mm_max_ss(a, _mm_movehdup_ps(a)))
33501    }
33502}
33503
33504/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33505///
33506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585)
33507#[inline]
33508#[target_feature(enable = "avx512f")]
33509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33510pub fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
33511    _mm512_reduce_max_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MIN), k, a))
33512}
33513
33514/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33515///
33516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584)
33517#[inline]
33518#[target_feature(enable = "avx512f")]
33519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33520pub fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
33521    unsafe {
33522        let a = _mm256_max_pd(
33523            _mm512_extractf64x4_pd::<0>(a),
33524            _mm512_extractf64x4_pd::<1>(a),
33525        );
33526        let a = _mm_max_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33527        _mm_cvtsd_f64(_mm_max_sd(a, simd_shuffle!(a, a, [1, 0])))
33528    }
33529}
33530
33531/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33532///
33533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583)
33534#[inline]
33535#[target_feature(enable = "avx512f")]
33536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33537pub fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
33538    _mm512_reduce_max_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MIN), k, a))
33539}
33540
33541/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33542///
33543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588)
33544#[inline]
33545#[target_feature(enable = "avx512f")]
33546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33547pub fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
33548    unsafe { simd_reduce_min(a.as_i32x16()) }
33549}
33550
33551/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33552///
33553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587)
33554#[inline]
33555#[target_feature(enable = "avx512f")]
33556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33557pub fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
33558    unsafe {
33559        simd_reduce_min(simd_select_bitmask(
33560            k,
33561            a.as_i32x16(),
33562            i32x16::splat(i32::MAX),
33563        ))
33564    }
33565}
33566
33567/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33568///
33569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590)
33570#[inline]
33571#[target_feature(enable = "avx512f")]
33572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33573pub fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
33574    unsafe { simd_reduce_min(a.as_i64x8()) }
33575}
33576
33577/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33578///
33579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
33580#[inline]
33581#[target_feature(enable = "avx512f")]
33582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33583pub fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
33584    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MAX))) }
33585}
33586
33587/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33588///
33589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592)
33590#[inline]
33591#[target_feature(enable = "avx512f")]
33592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33593pub fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
33594    unsafe { simd_reduce_min(a.as_u32x16()) }
33595}
33596
33597/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33598///
33599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591)
33600#[inline]
33601#[target_feature(enable = "avx512f")]
33602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33603pub fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
33604    unsafe {
33605        simd_reduce_min(simd_select_bitmask(
33606            k,
33607            a.as_u32x16(),
33608            u32x16::splat(u32::MAX),
33609        ))
33610    }
33611}
33612
33613/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33614///
33615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594)
33616#[inline]
33617#[target_feature(enable = "avx512f")]
33618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33619pub fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
33620    unsafe { simd_reduce_min(a.as_u64x8()) }
33621}
33622
33623/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33624///
33625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu64&expand=4589)
33626#[inline]
33627#[target_feature(enable = "avx512f")]
33628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33629pub fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
33630    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u64x8(), u64x8::splat(u64::MAX))) }
33631}
33632
33633/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33634///
33635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598)
33636#[inline]
33637#[target_feature(enable = "avx512f")]
33638#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33639pub fn _mm512_reduce_min_ps(a: __m512) -> f32 {
33640    unsafe {
33641        let a = _mm256_min_ps(
33642            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33643            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33644        );
33645        let a = _mm_min_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33646        let a = _mm_min_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33647        _mm_cvtss_f32(_mm_min_ss(a, _mm_movehdup_ps(a)))
33648    }
33649}
33650
33651/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33652///
33653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597)
33654#[inline]
33655#[target_feature(enable = "avx512f")]
33656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33657pub fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
33658    _mm512_reduce_min_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MAX), k, a))
33659}
33660
33661/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33662///
33663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596)
33664#[inline]
33665#[target_feature(enable = "avx512f")]
33666#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33667pub fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
33668    unsafe {
33669        let a = _mm256_min_pd(
33670            _mm512_extractf64x4_pd::<0>(a),
33671            _mm512_extractf64x4_pd::<1>(a),
33672        );
33673        let a = _mm_min_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33674        _mm_cvtsd_f64(_mm_min_sd(a, simd_shuffle!(a, a, [1, 0])))
33675    }
33676}
33677
33678/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33679///
33680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595)
33681#[inline]
33682#[target_feature(enable = "avx512f")]
33683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33684pub fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
33685    _mm512_reduce_min_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MAX), k, a))
33686}
33687
33688/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33689///
33690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564)
33691#[inline]
33692#[target_feature(enable = "avx512f")]
33693#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33694pub fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
33695    unsafe { simd_reduce_and(a.as_i32x16()) }
33696}
33697
33698/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
33699///
33700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563)
33701#[inline]
33702#[target_feature(enable = "avx512f")]
33703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33704pub fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
33705    unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i32x16(), i32x16::splat(-1))) }
33706}
33707
33708/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33709///
33710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566)
33711#[inline]
33712#[target_feature(enable = "avx512f")]
33713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33714pub fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
33715    unsafe { simd_reduce_and(a.as_i64x8()) }
33716}
33717
33718/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33719///
33720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi64&expand=4557)
33721#[inline]
33722#[target_feature(enable = "avx512f")]
33723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33724pub fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
33725    unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(-1))) }
33726}
33727
33728/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33729///
33730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608)
33731#[inline]
33732#[target_feature(enable = "avx512f")]
33733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33734pub fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
33735    unsafe { simd_reduce_or(a.as_i32x16()) }
33736}
33737
33738/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33739///
33740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607)
33741#[inline]
33742#[target_feature(enable = "avx512f")]
33743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33744pub fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
33745    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
33746}
33747
33748/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33749///
33750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610)
33751#[inline]
33752#[target_feature(enable = "avx512f")]
33753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33754pub fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
33755    unsafe { simd_reduce_or(a.as_i64x8()) }
33756}
33757
33758/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33759///
33760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609)
33761#[inline]
33762#[target_feature(enable = "avx512f")]
33763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33764pub fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
33765    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
33766}
33767
33768/// Returns vector of type `__m512d` with indeterminate elements.
33769/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33770/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33771/// In practice, this is typically equivalent to [`mem::zeroed`].
33772///
33773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd)
33774#[inline]
33775#[target_feature(enable = "avx512f")]
33776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33777// This intrinsic has no corresponding instruction.
33778pub fn _mm512_undefined_pd() -> __m512d {
33779    unsafe { const { mem::zeroed() } }
33780}
33781
33782/// Returns vector of type `__m512` with indeterminate elements.
33783/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33784/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33785/// In practice, this is typically equivalent to [`mem::zeroed`].
33786///
33787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps)
33788#[inline]
33789#[target_feature(enable = "avx512f")]
33790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33791// This intrinsic has no corresponding instruction.
33792pub fn _mm512_undefined_ps() -> __m512 {
33793    unsafe { const { mem::zeroed() } }
33794}
33795
33796/// Return vector of type __m512i with indeterminate elements.
33797/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33798/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33799/// In practice, this is typically equivalent to [`mem::zeroed`].
33800///
33801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995)
33802#[inline]
33803#[target_feature(enable = "avx512f")]
33804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33805// This intrinsic has no corresponding instruction.
33806pub fn _mm512_undefined_epi32() -> __m512i {
33807    unsafe { const { mem::zeroed() } }
33808}
33809
33810/// Return vector of type __m512 with indeterminate elements.
33811/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33812/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33813/// In practice, this is typically equivalent to [`mem::zeroed`].
33814///
33815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994)
33816#[inline]
33817#[target_feature(enable = "avx512f")]
33818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33819// This intrinsic has no corresponding instruction.
33820pub fn _mm512_undefined() -> __m512 {
33821    unsafe { const { mem::zeroed() } }
33822}
33823
33824/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33825///
33826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377)
33827#[inline]
33828#[target_feature(enable = "avx512f")]
33829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33830#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33831pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
33832    ptr::read_unaligned(mem_addr as *const __m512i)
33833}
33834
33835/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33836///
33837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374)
33838#[inline]
33839#[target_feature(enable = "avx512f,avx512vl")]
33840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33841#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33842pub unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
33843    ptr::read_unaligned(mem_addr as *const __m256i)
33844}
33845
33846/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33847///
33848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371)
33849#[inline]
33850#[target_feature(enable = "avx512f,avx512vl")]
33851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33852#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33853pub unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
33854    ptr::read_unaligned(mem_addr as *const __m128i)
33855}
33856
33857/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33858///
33859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460)
33860#[inline]
33861#[target_feature(enable = "avx512f")]
33862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33863#[cfg_attr(test, assert_instr(vpmovdw))]
33864pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
33865    vpmovdwmem(mem_addr.cast(), a.as_i32x16(), k);
33866}
33867
33868/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33869///
33870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi16&expand=1462)
33871#[inline]
33872#[target_feature(enable = "avx512f,avx512vl")]
33873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33874#[cfg_attr(test, assert_instr(vpmovdw))]
33875pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
33876    vpmovdwmem256(mem_addr.cast(), a.as_i32x8(), k);
33877}
33878
33879/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33880///
33881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi16&expand=1461)
33882#[inline]
33883#[target_feature(enable = "avx512f,avx512vl")]
33884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33885#[cfg_attr(test, assert_instr(vpmovdw))]
33886pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
33887    vpmovdwmem128(mem_addr.cast(), a.as_i32x4(), k);
33888}
33889
33890/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33891///
33892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
33893#[inline]
33894#[target_feature(enable = "avx512f")]
33895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33896#[cfg_attr(test, assert_instr(vpmovsdw))]
33897pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
33898    vpmovsdwmem(mem_addr.cast(), a.as_i32x16(), k);
33899}
33900
33901/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33902///
33903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
33904#[inline]
33905#[target_feature(enable = "avx512f,avx512vl")]
33906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33907#[cfg_attr(test, assert_instr(vpmovsdw))]
33908pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
33909    vpmovsdwmem256(mem_addr.cast(), a.as_i32x8(), k);
33910}
33911
33912/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33913///
33914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
33915#[inline]
33916#[target_feature(enable = "avx512f,avx512vl")]
33917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33918#[cfg_attr(test, assert_instr(vpmovsdw))]
33919pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
33920    vpmovsdwmem128(mem_addr.cast(), a.as_i32x4(), k);
33921}
33922
33923/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33924///
33925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
33926#[inline]
33927#[target_feature(enable = "avx512f")]
33928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33929#[cfg_attr(test, assert_instr(vpmovusdw))]
33930pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
33931    vpmovusdwmem(mem_addr.cast(), a.as_i32x16(), k);
33932}
33933
33934/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33935///
33936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
33937#[inline]
33938#[target_feature(enable = "avx512f,avx512vl")]
33939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33940#[cfg_attr(test, assert_instr(vpmovusdw))]
33941pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
33942    vpmovusdwmem256(mem_addr.cast(), a.as_i32x8(), k);
33943}
33944
33945/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33946///
33947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
33948#[inline]
33949#[target_feature(enable = "avx512f,avx512vl")]
33950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33951#[cfg_attr(test, assert_instr(vpmovusdw))]
33952pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
33953    vpmovusdwmem128(mem_addr.cast(), a.as_i32x4(), k);
33954}
33955
33956/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33957///
33958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
33959#[inline]
33960#[target_feature(enable = "avx512f")]
33961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33962#[cfg_attr(test, assert_instr(vpmovdb))]
33963pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33964    vpmovdbmem(mem_addr, a.as_i32x16(), k);
33965}
33966
33967/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33968///
33969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
33970#[inline]
33971#[target_feature(enable = "avx512f,avx512vl")]
33972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33973#[cfg_attr(test, assert_instr(vpmovdb))]
33974pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33975    vpmovdbmem256(mem_addr, a.as_i32x8(), k);
33976}
33977
33978/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33979///
33980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
33981#[inline]
33982#[target_feature(enable = "avx512f,avx512vl")]
33983#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33984#[cfg_attr(test, assert_instr(vpmovdb))]
33985pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33986    vpmovdbmem128(mem_addr, a.as_i32x4(), k);
33987}
33988
33989/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33990///
33991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
33992#[inline]
33993#[target_feature(enable = "avx512f")]
33994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33995#[cfg_attr(test, assert_instr(vpmovsdb))]
33996pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33997    vpmovsdbmem(mem_addr, a.as_i32x16(), k);
33998}
33999
34000/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34001///
34002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
34003#[inline]
34004#[target_feature(enable = "avx512f,avx512vl")]
34005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34006#[cfg_attr(test, assert_instr(vpmovsdb))]
34007pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34008    vpmovsdbmem256(mem_addr, a.as_i32x8(), k);
34009}
34010
34011/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34012///
34013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
34014#[inline]
34015#[target_feature(enable = "avx512f,avx512vl")]
34016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34017#[cfg_attr(test, assert_instr(vpmovsdb))]
34018pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34019    vpmovsdbmem128(mem_addr, a.as_i32x4(), k);
34020}
34021
34022/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34023///
34024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
34025#[inline]
34026#[target_feature(enable = "avx512f")]
34027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34028#[cfg_attr(test, assert_instr(vpmovusdb))]
34029pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
34030    vpmovusdbmem(mem_addr, a.as_i32x16(), k);
34031}
34032
34033/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34034///
34035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
34036#[inline]
34037#[target_feature(enable = "avx512f,avx512vl")]
34038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34039#[cfg_attr(test, assert_instr(vpmovusdb))]
34040pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34041    vpmovusdbmem256(mem_addr, a.as_i32x8(), k);
34042}
34043
34044/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34045///
34046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
34047#[inline]
34048#[target_feature(enable = "avx512f,avx512vl")]
34049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34050#[cfg_attr(test, assert_instr(vpmovusdb))]
34051pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34052    vpmovusdbmem128(mem_addr, a.as_i32x4(), k);
34053}
34054
34055/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34056///
34057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
34058#[inline]
34059#[target_feature(enable = "avx512f")]
34060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34061#[cfg_attr(test, assert_instr(vpmovqw))]
34062pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
34063    vpmovqwmem(mem_addr.cast(), a.as_i64x8(), k);
34064}
34065
34066/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34067///
34068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512)
34069#[inline]
34070#[target_feature(enable = "avx512f,avx512vl")]
34071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34072#[cfg_attr(test, assert_instr(vpmovqw))]
34073pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
34074    vpmovqwmem256(mem_addr.cast(), a.as_i64x4(), k);
34075}
34076
34077/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34078///
34079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511)
34080#[inline]
34081#[target_feature(enable = "avx512f,avx512vl")]
34082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34083#[cfg_attr(test, assert_instr(vpmovqw))]
34084pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
34085    vpmovqwmem128(mem_addr.cast(), a.as_i64x2(), k);
34086}
34087
34088/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34089///
34090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
34091#[inline]
34092#[target_feature(enable = "avx512f")]
34093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34094#[cfg_attr(test, assert_instr(vpmovsqw))]
34095pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
34096    vpmovsqwmem(mem_addr.cast(), a.as_i64x8(), k);
34097}
34098
34099/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34100///
34101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
34102#[inline]
34103#[target_feature(enable = "avx512f,avx512vl")]
34104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34105#[cfg_attr(test, assert_instr(vpmovsqw))]
34106pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
34107    vpmovsqwmem256(mem_addr.cast(), a.as_i64x4(), k);
34108}
34109
34110/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34111///
34112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
34113#[inline]
34114#[target_feature(enable = "avx512f,avx512vl")]
34115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34116#[cfg_attr(test, assert_instr(vpmovsqw))]
34117pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
34118    vpmovsqwmem128(mem_addr.cast(), a.as_i64x2(), k);
34119}
34120
34121/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34122///
34123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
34124#[inline]
34125#[target_feature(enable = "avx512f")]
34126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34127#[cfg_attr(test, assert_instr(vpmovusqw))]
34128pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
34129    vpmovusqwmem(mem_addr.cast(), a.as_i64x8(), k);
34130}
34131
34132/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34133///
34134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
34135#[inline]
34136#[target_feature(enable = "avx512f,avx512vl")]
34137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34138#[cfg_attr(test, assert_instr(vpmovusqw))]
34139pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
34140    vpmovusqwmem256(mem_addr.cast(), a.as_i64x4(), k);
34141}
34142
34143/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34144///
34145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
34146#[inline]
34147#[target_feature(enable = "avx512f,avx512vl")]
34148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34149#[cfg_attr(test, assert_instr(vpmovusqw))]
34150pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
34151    vpmovusqwmem128(mem_addr.cast(), a.as_i64x2(), k);
34152}
34153
34154/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34155///
34156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
34157#[inline]
34158#[target_feature(enable = "avx512f")]
34159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34160#[cfg_attr(test, assert_instr(vpmovqb))]
34161pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34162    vpmovqbmem(mem_addr, a.as_i64x8(), k);
34163}
34164
34165/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34166///
34167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518)
34168#[inline]
34169#[target_feature(enable = "avx512f,avx512vl")]
34170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34171#[cfg_attr(test, assert_instr(vpmovqb))]
34172pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34173    vpmovqbmem256(mem_addr, a.as_i64x4(), k);
34174}
34175
34176/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34177///
34178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517)
34179#[inline]
34180#[target_feature(enable = "avx512f,avx512vl")]
34181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34182#[cfg_attr(test, assert_instr(vpmovqb))]
34183pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34184    vpmovqbmem128(mem_addr, a.as_i64x2(), k);
34185}
34186
34187/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34188///
34189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
34190#[inline]
34191#[target_feature(enable = "avx512f")]
34192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34193#[cfg_attr(test, assert_instr(vpmovsqb))]
34194pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34195    vpmovsqbmem(mem_addr, a.as_i64x8(), k);
34196}
34197
34198/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34199///
34200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
34201#[inline]
34202#[target_feature(enable = "avx512f,avx512vl")]
34203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34204#[cfg_attr(test, assert_instr(vpmovsqb))]
34205pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34206    vpmovsqbmem256(mem_addr, a.as_i64x4(), k);
34207}
34208
34209/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34210///
34211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
34212#[inline]
34213#[target_feature(enable = "avx512f,avx512vl")]
34214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34215#[cfg_attr(test, assert_instr(vpmovsqb))]
34216pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34217    vpmovsqbmem128(mem_addr, a.as_i64x2(), k);
34218}
34219
34220/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34221///
34222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
34223#[inline]
34224#[target_feature(enable = "avx512f")]
34225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34226#[cfg_attr(test, assert_instr(vpmovusqb))]
34227pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34228    vpmovusqbmem(mem_addr, a.as_i64x8(), k);
34229}
34230
34231/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34232///
34233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
34234#[inline]
34235#[target_feature(enable = "avx512f,avx512vl")]
34236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34237#[cfg_attr(test, assert_instr(vpmovusqb))]
34238pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34239    vpmovusqbmem256(mem_addr, a.as_i64x4(), k);
34240}
34241
34242/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34243///
34244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
34245#[inline]
34246#[target_feature(enable = "avx512f,avx512vl")]
34247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34248#[cfg_attr(test, assert_instr(vpmovusqb))]
34249pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34250    vpmovusqbmem128(mem_addr, a.as_i64x2(), k);
34251}
34252
34253///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34254///
34255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
34256#[inline]
34257#[target_feature(enable = "avx512f")]
34258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34259#[cfg_attr(test, assert_instr(vpmovqd))]
34260pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
34261    vpmovqdmem(mem_addr.cast(), a.as_i64x8(), k);
34262}
34263
34264///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34265///
34266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515)
34267#[inline]
34268#[target_feature(enable = "avx512f,avx512vl")]
34269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34270#[cfg_attr(test, assert_instr(vpmovqd))]
34271pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
34272    vpmovqdmem256(mem_addr.cast(), a.as_i64x4(), k);
34273}
34274
34275///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34276///
34277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514)
34278#[inline]
34279#[target_feature(enable = "avx512f,avx512vl")]
34280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34281#[cfg_attr(test, assert_instr(vpmovqd))]
34282pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
34283    vpmovqdmem128(mem_addr.cast(), a.as_i64x2(), k);
34284}
34285
34286/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34287///
34288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
34289#[inline]
34290#[target_feature(enable = "avx512f")]
34291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34292#[cfg_attr(test, assert_instr(vpmovsqd))]
34293pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
34294    vpmovsqdmem(mem_addr.cast(), a.as_i64x8(), k);
34295}
34296
34297/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34298///
34299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
34300#[inline]
34301#[target_feature(enable = "avx512f,avx512vl")]
34302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34303#[cfg_attr(test, assert_instr(vpmovsqd))]
34304pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
34305    vpmovsqdmem256(mem_addr.cast(), a.as_i64x4(), k);
34306}
34307
34308/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34309///
34310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
34311#[inline]
34312#[target_feature(enable = "avx512f,avx512vl")]
34313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34314#[cfg_attr(test, assert_instr(vpmovsqd))]
34315pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
34316    vpmovsqdmem128(mem_addr.cast(), a.as_i64x2(), k);
34317}
34318
34319/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34320///
34321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
34322#[inline]
34323#[target_feature(enable = "avx512f")]
34324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34325#[cfg_attr(test, assert_instr(vpmovusqd))]
34326pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
34327    vpmovusqdmem(mem_addr.cast(), a.as_i64x8(), k);
34328}
34329
34330/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34331///
34332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
34333#[inline]
34334#[target_feature(enable = "avx512f,avx512vl")]
34335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34336#[cfg_attr(test, assert_instr(vpmovusqd))]
34337pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
34338    vpmovusqdmem256(mem_addr.cast(), a.as_i64x4(), k);
34339}
34340
34341/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34342///
34343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
34344#[inline]
34345#[target_feature(enable = "avx512f,avx512vl")]
34346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34347#[cfg_attr(test, assert_instr(vpmovusqd))]
34348pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
34349    vpmovusqdmem128(mem_addr.cast(), a.as_i64x2(), k);
34350}
34351
34352/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34353///
34354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628)
34355#[inline]
34356#[target_feature(enable = "avx512f")]
34357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34358#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34359pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
34360    ptr::write_unaligned(mem_addr as *mut __m512i, a);
34361}
34362
34363/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34364///
34365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626)
34366#[inline]
34367#[target_feature(enable = "avx512f,avx512vl")]
34368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34369#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34370pub unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
34371    ptr::write_unaligned(mem_addr as *mut __m256i, a);
34372}
34373
34374/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34375///
34376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624)
34377#[inline]
34378#[target_feature(enable = "avx512f,avx512vl")]
34379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34380#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34381pub unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
34382    ptr::write_unaligned(mem_addr as *mut __m128i, a);
34383}
34384
34385/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34386///
34387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386)
34388#[inline]
34389#[target_feature(enable = "avx512f")]
34390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34391#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34392pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
34393    ptr::read_unaligned(mem_addr as *const __m512i)
34394}
34395
34396/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34397///
34398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383)
34399#[inline]
34400#[target_feature(enable = "avx512f,avx512vl")]
34401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34402#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34403pub unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
34404    ptr::read_unaligned(mem_addr as *const __m256i)
34405}
34406
34407/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34408///
34409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380)
34410#[inline]
34411#[target_feature(enable = "avx512f,avx512vl")]
34412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34413#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34414pub unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
34415    ptr::read_unaligned(mem_addr as *const __m128i)
34416}
34417
34418/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34419///
34420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634)
34421#[inline]
34422#[target_feature(enable = "avx512f")]
34423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34424#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34425pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
34426    ptr::write_unaligned(mem_addr as *mut __m512i, a);
34427}
34428
34429/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34430///
34431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632)
34432#[inline]
34433#[target_feature(enable = "avx512f,avx512vl")]
34434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34435#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34436pub unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
34437    ptr::write_unaligned(mem_addr as *mut __m256i, a);
34438}
34439
34440/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34441///
34442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630)
34443#[inline]
34444#[target_feature(enable = "avx512f,avx512vl")]
34445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34446#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34447pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
34448    ptr::write_unaligned(mem_addr as *mut __m128i, a);
34449}
34450
34451/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34452///
34453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420)
34454#[inline]
34455#[target_feature(enable = "avx512f")]
34456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34457#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34458pub unsafe fn _mm512_loadu_si512(mem_addr: *const __m512i) -> __m512i {
34459    ptr::read_unaligned(mem_addr)
34460}
34461
34462/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
34463///
34464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657)
34465#[inline]
34466#[target_feature(enable = "avx512f")]
34467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34468#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34469pub unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
34470    ptr::write_unaligned(mem_addr, a);
34471}
34472
34473/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
34474/// floating-point elements) from memory into result.
34475/// `mem_addr` does not need to be aligned on any particular boundary.
34476///
34477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd)
34478#[inline]
34479#[target_feature(enable = "avx512f")]
34480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34481#[cfg_attr(test, assert_instr(vmovups))]
34482pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
34483    ptr::read_unaligned(mem_addr as *const __m512d)
34484}
34485
34486/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
34487/// floating-point elements) from `a` into memory.
34488/// `mem_addr` does not need to be aligned on any particular boundary.
34489///
34490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd)
34491#[inline]
34492#[target_feature(enable = "avx512f")]
34493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34494#[cfg_attr(test, assert_instr(vmovups))]
34495pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
34496    ptr::write_unaligned(mem_addr as *mut __m512d, a);
34497}
34498
34499/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
34500/// floating-point elements) from memory into result.
34501/// `mem_addr` does not need to be aligned on any particular boundary.
34502///
34503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps)
34504#[inline]
34505#[target_feature(enable = "avx512f")]
34506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34507#[cfg_attr(test, assert_instr(vmovups))]
34508pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
34509    ptr::read_unaligned(mem_addr as *const __m512)
34510}
34511
34512/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
34513/// floating-point elements) from `a` into memory.
34514/// `mem_addr` does not need to be aligned on any particular boundary.
34515///
34516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps)
34517#[inline]
34518#[target_feature(enable = "avx512f")]
34519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34520#[cfg_attr(test, assert_instr(vmovups))]
34521pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
34522    ptr::write_unaligned(mem_addr as *mut __m512, a);
34523}
34524
34525/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34526///
34527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_si512&expand=3345)
34528#[inline]
34529#[target_feature(enable = "avx512f")]
34530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34531#[cfg_attr(
34532    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34533    assert_instr(vmovaps)
34534)] //should be vmovdqa32
34535pub unsafe fn _mm512_load_si512(mem_addr: *const __m512i) -> __m512i {
34536    ptr::read(mem_addr)
34537}
34538
34539/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34540///
34541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_si512&expand=5598)
34542#[inline]
34543#[target_feature(enable = "avx512f")]
34544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34545#[cfg_attr(
34546    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34547    assert_instr(vmovaps)
34548)] //should be vmovdqa32
34549pub unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
34550    ptr::write(mem_addr, a);
34551}
34552
34553/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34554///
34555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304)
34556#[inline]
34557#[target_feature(enable = "avx512f")]
34558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34559#[cfg_attr(
34560    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34561    assert_instr(vmovaps)
34562)] //should be vmovdqa32
34563pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
34564    ptr::read(mem_addr as *const __m512i)
34565}
34566
34567/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34568///
34569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301)
34570#[inline]
34571#[target_feature(enable = "avx512f,avx512vl")]
34572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34573#[cfg_attr(
34574    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34575    assert_instr(vmovaps)
34576)] //should be vmovdqa32
34577pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
34578    ptr::read(mem_addr as *const __m256i)
34579}
34580
34581/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34582///
34583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298)
34584#[inline]
34585#[target_feature(enable = "avx512f,avx512vl")]
34586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34587#[cfg_attr(
34588    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34589    assert_instr(vmovaps)
34590)] //should be vmovdqa32
34591pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
34592    ptr::read(mem_addr as *const __m128i)
34593}
34594
34595/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34596///
34597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi32&expand=5569)
34598#[inline]
34599#[target_feature(enable = "avx512f")]
34600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34601#[cfg_attr(
34602    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34603    assert_instr(vmovaps)
34604)] //should be vmovdqa32
34605pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
34606    ptr::write(mem_addr as *mut __m512i, a);
34607}
34608
34609/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34610///
34611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567)
34612#[inline]
34613#[target_feature(enable = "avx512f,avx512vl")]
34614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34615#[cfg_attr(
34616    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34617    assert_instr(vmovaps)
34618)] //should be vmovdqa32
34619pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
34620    ptr::write(mem_addr as *mut __m256i, a);
34621}
34622
34623/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34624///
34625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565)
34626#[inline]
34627#[target_feature(enable = "avx512f,avx512vl")]
34628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34629#[cfg_attr(
34630    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34631    assert_instr(vmovaps)
34632)] //should be vmovdqa32
34633pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
34634    ptr::write(mem_addr as *mut __m128i, a);
34635}
34636
34637/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34638///
34639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313)
34640#[inline]
34641#[target_feature(enable = "avx512f")]
34642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34643#[cfg_attr(
34644    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34645    assert_instr(vmovaps)
34646)] //should be vmovdqa64
34647pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
34648    ptr::read(mem_addr as *const __m512i)
34649}
34650
34651/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34652///
34653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310)
34654#[inline]
34655#[target_feature(enable = "avx512f,avx512vl")]
34656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34657#[cfg_attr(
34658    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34659    assert_instr(vmovaps)
34660)] //should be vmovdqa64
34661pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
34662    ptr::read(mem_addr as *const __m256i)
34663}
34664
34665/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34666///
34667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307)
34668#[inline]
34669#[target_feature(enable = "avx512f,avx512vl")]
34670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34671#[cfg_attr(
34672    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34673    assert_instr(vmovaps)
34674)] //should be vmovdqa64
34675pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
34676    ptr::read(mem_addr as *const __m128i)
34677}
34678
34679/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34680///
34681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575)
34682#[inline]
34683#[target_feature(enable = "avx512f")]
34684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34685#[cfg_attr(
34686    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34687    assert_instr(vmovaps)
34688)] //should be vmovdqa64
34689pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
34690    ptr::write(mem_addr as *mut __m512i, a);
34691}
34692
34693/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34694///
34695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573)
34696#[inline]
34697#[target_feature(enable = "avx512f,avx512vl")]
34698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34699#[cfg_attr(
34700    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34701    assert_instr(vmovaps)
34702)] //should be vmovdqa64
34703pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
34704    ptr::write(mem_addr as *mut __m256i, a);
34705}
34706
34707/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34708///
34709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571)
34710#[inline]
34711#[target_feature(enable = "avx512f,avx512vl")]
34712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34713#[cfg_attr(
34714    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34715    assert_instr(vmovaps)
34716)] //should be vmovdqa64
34717pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
34718    ptr::write(mem_addr as *mut __m128i, a);
34719}
34720
34721/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34722///
34723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336)
34724#[inline]
34725#[target_feature(enable = "avx512f")]
34726#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34727#[cfg_attr(
34728    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34729    assert_instr(vmovaps)
34730)]
34731pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
34732    ptr::read(mem_addr as *const __m512)
34733}
34734
34735/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34736///
34737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592)
34738#[inline]
34739#[target_feature(enable = "avx512f")]
34740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34741#[cfg_attr(
34742    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34743    assert_instr(vmovaps)
34744)]
34745pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
34746    ptr::write(mem_addr as *mut __m512, a);
34747}
34748
34749/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34750///
34751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326)
34752#[inline]
34753#[target_feature(enable = "avx512f")]
34754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34755#[cfg_attr(
34756    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34757    assert_instr(vmovaps)
34758)] //should be vmovapd
34759pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
34760    ptr::read(mem_addr as *const __m512d)
34761}
34762
34763/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34764///
34765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585)
34766#[inline]
34767#[target_feature(enable = "avx512f")]
34768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34769#[cfg_attr(
34770    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34771    assert_instr(vmovaps)
34772)] //should be vmovapd
34773pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
34774    ptr::write(mem_addr as *mut __m512d, a);
34775}
34776
34777/// Load packed 32-bit integers from memory into dst using writemask k
34778/// (elements are copied from src when the corresponding mask bit is not set).
34779/// mem_addr does not need to be aligned on any particular boundary.
34780///
34781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32)
34782#[inline]
34783#[target_feature(enable = "avx512f")]
34784#[cfg_attr(test, assert_instr(vmovdqu32))]
34785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34786pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
34787    transmute(loaddqu32_512(mem_addr, src.as_i32x16(), k))
34788}
34789
34790/// Load packed 32-bit integers from memory into dst using zeromask k
34791/// (elements are zeroed out when the corresponding mask bit is not set).
34792/// mem_addr does not need to be aligned on any particular boundary.
34793///
34794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32)
34795#[inline]
34796#[target_feature(enable = "avx512f")]
34797#[cfg_attr(test, assert_instr(vmovdqu32))]
34798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34799pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
34800    _mm512_mask_loadu_epi32(_mm512_setzero_si512(), k, mem_addr)
34801}
34802
34803/// Load packed 64-bit integers from memory into dst using writemask k
34804/// (elements are copied from src when the corresponding mask bit is not set).
34805/// mem_addr does not need to be aligned on any particular boundary.
34806///
34807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64)
34808#[inline]
34809#[target_feature(enable = "avx512f")]
34810#[cfg_attr(test, assert_instr(vmovdqu64))]
34811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34812pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
34813    transmute(loaddqu64_512(mem_addr, src.as_i64x8(), k))
34814}
34815
34816/// Load packed 64-bit integers from memory into dst using zeromask k
34817/// (elements are zeroed out when the corresponding mask bit is not set).
34818/// mem_addr does not need to be aligned on any particular boundary.
34819///
34820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64)
34821#[inline]
34822#[target_feature(enable = "avx512f")]
34823#[cfg_attr(test, assert_instr(vmovdqu64))]
34824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34825pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
34826    _mm512_mask_loadu_epi64(_mm512_setzero_si512(), k, mem_addr)
34827}
34828
34829/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34830/// (elements are copied from src when the corresponding mask bit is not set).
34831/// mem_addr does not need to be aligned on any particular boundary.
34832///
34833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps)
34834#[inline]
34835#[target_feature(enable = "avx512f")]
34836#[cfg_attr(test, assert_instr(vmovups))]
34837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34838pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
34839    transmute(loadups_512(mem_addr, src.as_f32x16(), k))
34840}
34841
34842/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34843/// (elements are zeroed out when the corresponding mask bit is not set).
34844/// mem_addr does not need to be aligned on any particular boundary.
34845///
34846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps)
34847#[inline]
34848#[target_feature(enable = "avx512f")]
34849#[cfg_attr(test, assert_instr(vmovups))]
34850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34851pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
34852    _mm512_mask_loadu_ps(_mm512_setzero_ps(), k, mem_addr)
34853}
34854
34855/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34856/// (elements are copied from src when the corresponding mask bit is not set).
34857/// mem_addr does not need to be aligned on any particular boundary.
34858///
34859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd)
34860#[inline]
34861#[target_feature(enable = "avx512f")]
34862#[cfg_attr(test, assert_instr(vmovupd))]
34863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34864pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
34865    transmute(loadupd_512(mem_addr, src.as_f64x8(), k))
34866}
34867
34868/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34869/// (elements are zeroed out when the corresponding mask bit is not set).
34870/// mem_addr does not need to be aligned on any particular boundary.
34871///
34872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd)
34873#[inline]
34874#[target_feature(enable = "avx512f")]
34875#[cfg_attr(test, assert_instr(vmovupd))]
34876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34877pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
34878    _mm512_mask_loadu_pd(_mm512_setzero_pd(), k, mem_addr)
34879}
34880
34881/// Load packed 32-bit integers from memory into dst using writemask k
34882/// (elements are copied from src when the corresponding mask bit is not set).
34883/// mem_addr does not need to be aligned on any particular boundary.
34884///
34885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32)
34886#[inline]
34887#[target_feature(enable = "avx512f,avx512vl")]
34888#[cfg_attr(test, assert_instr(vmovdqu32))]
34889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34890pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
34891    transmute(loaddqu32_256(mem_addr, src.as_i32x8(), k))
34892}
34893
34894/// Load packed 32-bit integers from memory into dst using zeromask k
34895/// (elements are zeroed out when the corresponding mask bit is not set).
34896/// mem_addr does not need to be aligned on any particular boundary.
34897///
34898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32)
34899#[inline]
34900#[target_feature(enable = "avx512f,avx512vl")]
34901#[cfg_attr(test, assert_instr(vmovdqu32))]
34902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34903pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
34904    _mm256_mask_loadu_epi32(_mm256_setzero_si256(), k, mem_addr)
34905}
34906
34907/// Load packed 64-bit integers from memory into dst using writemask k
34908/// (elements are copied from src when the corresponding mask bit is not set).
34909/// mem_addr does not need to be aligned on any particular boundary.
34910///
34911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64)
34912#[inline]
34913#[target_feature(enable = "avx512f,avx512vl")]
34914#[cfg_attr(test, assert_instr(vmovdqu64))]
34915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34916pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
34917    transmute(loaddqu64_256(mem_addr, src.as_i64x4(), k))
34918}
34919
34920/// Load packed 64-bit integers from memory into dst using zeromask k
34921/// (elements are zeroed out when the corresponding mask bit is not set).
34922/// mem_addr does not need to be aligned on any particular boundary.
34923///
34924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64)
34925#[inline]
34926#[target_feature(enable = "avx512f,avx512vl")]
34927#[cfg_attr(test, assert_instr(vmovdqu64))]
34928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34929pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
34930    _mm256_mask_loadu_epi64(_mm256_setzero_si256(), k, mem_addr)
34931}
34932
34933/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34934/// (elements are copied from src when the corresponding mask bit is not set).
34935/// mem_addr does not need to be aligned on any particular boundary.
34936///
34937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps)
34938#[inline]
34939#[target_feature(enable = "avx512f,avx512vl")]
34940#[cfg_attr(test, assert_instr(vmovups))]
34941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34942pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
34943    transmute(loadups_256(mem_addr, src.as_f32x8(), k))
34944}
34945
34946/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34947/// (elements are zeroed out when the corresponding mask bit is not set).
34948/// mem_addr does not need to be aligned on any particular boundary.
34949///
34950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps)
34951#[inline]
34952#[target_feature(enable = "avx512f,avx512vl")]
34953#[cfg_attr(test, assert_instr(vmovups))]
34954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34955pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
34956    _mm256_mask_loadu_ps(_mm256_setzero_ps(), k, mem_addr)
34957}
34958
34959/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34960/// (elements are copied from src when the corresponding mask bit is not set).
34961/// mem_addr does not need to be aligned on any particular boundary.
34962///
34963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd)
34964#[inline]
34965#[target_feature(enable = "avx512f,avx512vl")]
34966#[cfg_attr(test, assert_instr(vmovupd))]
34967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34968pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
34969    transmute(loadupd_256(mem_addr, src.as_f64x4(), k))
34970}
34971
34972/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34973/// (elements are zeroed out when the corresponding mask bit is not set).
34974/// mem_addr does not need to be aligned on any particular boundary.
34975///
34976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd)
34977#[inline]
34978#[target_feature(enable = "avx512f,avx512vl")]
34979#[cfg_attr(test, assert_instr(vmovupd))]
34980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34981pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
34982    _mm256_mask_loadu_pd(_mm256_setzero_pd(), k, mem_addr)
34983}
34984
34985/// Load packed 32-bit integers from memory into dst using writemask k
34986/// (elements are copied from src when the corresponding mask bit is not set).
34987/// mem_addr does not need to be aligned on any particular boundary.
34988///
34989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32)
34990#[inline]
34991#[target_feature(enable = "avx512f,avx512vl")]
34992#[cfg_attr(test, assert_instr(vmovdqu32))]
34993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34994pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
34995    transmute(loaddqu32_128(mem_addr, src.as_i32x4(), k))
34996}
34997
34998/// Load packed 32-bit integers from memory into dst using zeromask k
34999/// (elements are zeroed out when the corresponding mask bit is not set).
35000/// mem_addr does not need to be aligned on any particular boundary.
35001///
35002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32)
35003#[inline]
35004#[target_feature(enable = "avx512f,avx512vl")]
35005#[cfg_attr(test, assert_instr(vmovdqu32))]
35006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35007pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35008    _mm_mask_loadu_epi32(_mm_setzero_si128(), k, mem_addr)
35009}
35010
35011/// Load packed 64-bit integers from memory into dst using writemask k
35012/// (elements are copied from src when the corresponding mask bit is not set).
35013/// mem_addr does not need to be aligned on any particular boundary.
35014///
35015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64)
35016#[inline]
35017#[target_feature(enable = "avx512f,avx512vl")]
35018#[cfg_attr(test, assert_instr(vmovdqu64))]
35019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35020pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
35021    transmute(loaddqu64_128(mem_addr, src.as_i64x2(), k))
35022}
35023
35024/// Load packed 64-bit integers from memory into dst using zeromask k
35025/// (elements are zeroed out when the corresponding mask bit is not set).
35026/// mem_addr does not need to be aligned on any particular boundary.
35027///
35028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64)
35029#[inline]
35030#[target_feature(enable = "avx512f,avx512vl")]
35031#[cfg_attr(test, assert_instr(vmovdqu64))]
35032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35033pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35034    _mm_mask_loadu_epi64(_mm_setzero_si128(), k, mem_addr)
35035}
35036
35037/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35038/// (elements are copied from src when the corresponding mask bit is not set).
35039/// mem_addr does not need to be aligned on any particular boundary.
35040///
35041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps)
35042#[inline]
35043#[target_feature(enable = "avx512f,avx512vl")]
35044#[cfg_attr(test, assert_instr(vmovups))]
35045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35046pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35047    transmute(loadups_128(mem_addr, src.as_f32x4(), k))
35048}
35049
35050/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35051/// (elements are zeroed out when the corresponding mask bit is not set).
35052/// mem_addr does not need to be aligned on any particular boundary.
35053///
35054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps)
35055#[inline]
35056#[target_feature(enable = "avx512f,avx512vl")]
35057#[cfg_attr(test, assert_instr(vmovups))]
35058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35059pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35060    _mm_mask_loadu_ps(_mm_setzero_ps(), k, mem_addr)
35061}
35062
35063/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35064/// (elements are copied from src when the corresponding mask bit is not set).
35065/// mem_addr does not need to be aligned on any particular boundary.
35066///
35067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd)
35068#[inline]
35069#[target_feature(enable = "avx512f,avx512vl")]
35070#[cfg_attr(test, assert_instr(vmovupd))]
35071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35072pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35073    transmute(loadupd_128(mem_addr, src.as_f64x2(), k))
35074}
35075
35076/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35077/// (elements are zeroed out when the corresponding mask bit is not set).
35078/// mem_addr does not need to be aligned on any particular boundary.
35079///
35080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd)
35081#[inline]
35082#[target_feature(enable = "avx512f,avx512vl")]
35083#[cfg_attr(test, assert_instr(vmovupd))]
35084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35085pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35086    _mm_mask_loadu_pd(_mm_setzero_pd(), k, mem_addr)
35087}
35088
35089/// Load packed 32-bit integers from memory into dst using writemask k
35090/// (elements are copied from src when the corresponding mask bit is not set).
35091/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35092///
35093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
35094#[inline]
35095#[target_feature(enable = "avx512f")]
35096#[cfg_attr(test, assert_instr(vmovdqa32))]
35097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35098pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
35099    transmute(loaddqa32_512(mem_addr, src.as_i32x16(), k))
35100}
35101
35102/// Load packed 32-bit integers from memory into dst using zeromask k
35103/// (elements are zeroed out when the corresponding mask bit is not set).
35104/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35105///
35106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
35107#[inline]
35108#[target_feature(enable = "avx512f")]
35109#[cfg_attr(test, assert_instr(vmovdqa32))]
35110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35111pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35112    _mm512_mask_load_epi32(_mm512_setzero_si512(), k, mem_addr)
35113}
35114
35115/// Load packed 64-bit integers from memory into dst using writemask k
35116/// (elements are copied from src when the corresponding mask bit is not set).
35117/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35118///
35119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
35120#[inline]
35121#[target_feature(enable = "avx512f")]
35122#[cfg_attr(test, assert_instr(vmovdqa64))]
35123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35124pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
35125    transmute(loaddqa64_512(mem_addr, src.as_i64x8(), k))
35126}
35127
35128/// Load packed 64-bit integers from memory into dst using zeromask k
35129/// (elements are zeroed out when the corresponding mask bit is not set).
35130/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35131///
35132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
35133#[inline]
35134#[target_feature(enable = "avx512f")]
35135#[cfg_attr(test, assert_instr(vmovdqa64))]
35136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35137pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35138    _mm512_mask_load_epi64(_mm512_setzero_si512(), k, mem_addr)
35139}
35140
35141/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35142/// (elements are copied from src when the corresponding mask bit is not set).
35143/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35144///
35145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
35146#[inline]
35147#[target_feature(enable = "avx512f")]
35148#[cfg_attr(test, assert_instr(vmovaps))]
35149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35150pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
35151    transmute(loadaps_512(mem_addr, src.as_f32x16(), k))
35152}
35153
35154/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35155/// (elements are zeroed out when the corresponding mask bit is not set).
35156/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35157///
35158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
35159#[inline]
35160#[target_feature(enable = "avx512f")]
35161#[cfg_attr(test, assert_instr(vmovaps))]
35162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35163pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35164    _mm512_mask_load_ps(_mm512_setzero_ps(), k, mem_addr)
35165}
35166
35167/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35168/// (elements are copied from src when the corresponding mask bit is not set).
35169/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35170///
35171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
35172#[inline]
35173#[target_feature(enable = "avx512f")]
35174#[cfg_attr(test, assert_instr(vmovapd))]
35175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35176pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
35177    transmute(loadapd_512(mem_addr, src.as_f64x8(), k))
35178}
35179
35180/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35181/// (elements are zeroed out when the corresponding mask bit is not set).
35182/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35183///
35184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
35185#[inline]
35186#[target_feature(enable = "avx512f")]
35187#[cfg_attr(test, assert_instr(vmovapd))]
35188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35189pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
35190    _mm512_mask_load_pd(_mm512_setzero_pd(), k, mem_addr)
35191}
35192
35193/// Load packed 32-bit integers from memory into dst using writemask k
35194/// (elements are copied from src when the corresponding mask bit is not set).
35195/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35196///
35197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
35198#[inline]
35199#[target_feature(enable = "avx512f,avx512vl")]
35200#[cfg_attr(test, assert_instr(vmovdqa32))]
35201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35202pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
35203    transmute(loaddqa32_256(mem_addr, src.as_i32x8(), k))
35204}
35205
35206/// Load packed 32-bit integers from memory into dst using zeromask k
35207/// (elements are zeroed out when the corresponding mask bit is not set).
35208/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35209///
35210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
35211#[inline]
35212#[target_feature(enable = "avx512f,avx512vl")]
35213#[cfg_attr(test, assert_instr(vmovdqa32))]
35214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35215pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35216    _mm256_mask_load_epi32(_mm256_setzero_si256(), k, mem_addr)
35217}
35218
35219/// Load packed 64-bit integers from memory into dst using writemask k
35220/// (elements are copied from src when the corresponding mask bit is not set).
35221/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35222///
35223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
35224#[inline]
35225#[target_feature(enable = "avx512f,avx512vl")]
35226#[cfg_attr(test, assert_instr(vmovdqa64))]
35227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35228pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
35229    transmute(loaddqa64_256(mem_addr, src.as_i64x4(), k))
35230}
35231
35232/// Load packed 64-bit integers from memory into dst using zeromask k
35233/// (elements are zeroed out when the corresponding mask bit is not set).
35234/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35235///
35236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
35237#[inline]
35238#[target_feature(enable = "avx512f,avx512vl")]
35239#[cfg_attr(test, assert_instr(vmovdqa64))]
35240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35241pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35242    _mm256_mask_load_epi64(_mm256_setzero_si256(), k, mem_addr)
35243}
35244
35245/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35246/// (elements are copied from src when the corresponding mask bit is not set).
35247/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35248///
35249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
35250#[inline]
35251#[target_feature(enable = "avx512f,avx512vl")]
35252#[cfg_attr(test, assert_instr(vmovaps))]
35253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35254pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
35255    transmute(loadaps_256(mem_addr, src.as_f32x8(), k))
35256}
35257
35258/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35259/// (elements are zeroed out when the corresponding mask bit is not set).
35260/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35261///
35262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
35263#[inline]
35264#[target_feature(enable = "avx512f,avx512vl")]
35265#[cfg_attr(test, assert_instr(vmovaps))]
35266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35267pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
35268    _mm256_mask_load_ps(_mm256_setzero_ps(), k, mem_addr)
35269}
35270
35271/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35272/// (elements are copied from src when the corresponding mask bit is not set).
35273/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35274///
35275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
35276#[inline]
35277#[target_feature(enable = "avx512f,avx512vl")]
35278#[cfg_attr(test, assert_instr(vmovapd))]
35279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35280pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
35281    transmute(loadapd_256(mem_addr, src.as_f64x4(), k))
35282}
35283
35284/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35285/// (elements are zeroed out when the corresponding mask bit is not set).
35286/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35287///
35288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
35289#[inline]
35290#[target_feature(enable = "avx512f,avx512vl")]
35291#[cfg_attr(test, assert_instr(vmovapd))]
35292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35293pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
35294    _mm256_mask_load_pd(_mm256_setzero_pd(), k, mem_addr)
35295}
35296
35297/// Load packed 32-bit integers from memory into dst using writemask k
35298/// (elements are copied from src when the corresponding mask bit is not set).
35299/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35300///
35301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
35302#[inline]
35303#[target_feature(enable = "avx512f,avx512vl")]
35304#[cfg_attr(test, assert_instr(vmovdqa32))]
35305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35306pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
35307    transmute(loaddqa32_128(mem_addr, src.as_i32x4(), k))
35308}
35309
35310/// Load packed 32-bit integers from memory into dst using zeromask k
35311/// (elements are zeroed out when the corresponding mask bit is not set).
35312/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35313///
35314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
35315#[inline]
35316#[target_feature(enable = "avx512f,avx512vl")]
35317#[cfg_attr(test, assert_instr(vmovdqa32))]
35318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35319pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35320    _mm_mask_load_epi32(_mm_setzero_si128(), k, mem_addr)
35321}
35322
35323/// Load packed 64-bit integers from memory into dst using writemask k
35324/// (elements are copied from src when the corresponding mask bit is not set).
35325/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35326///
35327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
35328#[inline]
35329#[target_feature(enable = "avx512f,avx512vl")]
35330#[cfg_attr(test, assert_instr(vmovdqa64))]
35331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35332pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
35333    transmute(loaddqa64_128(mem_addr, src.as_i64x2(), k))
35334}
35335
35336/// Load packed 64-bit integers from memory into dst using zeromask k
35337/// (elements are zeroed out when the corresponding mask bit is not set).
35338/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35339///
35340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
35341#[inline]
35342#[target_feature(enable = "avx512f,avx512vl")]
35343#[cfg_attr(test, assert_instr(vmovdqa64))]
35344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35345pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35346    _mm_mask_load_epi64(_mm_setzero_si128(), k, mem_addr)
35347}
35348
35349/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35350/// (elements are copied from src when the corresponding mask bit is not set).
35351/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35352///
35353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
35354#[inline]
35355#[target_feature(enable = "avx512f,avx512vl")]
35356#[cfg_attr(test, assert_instr(vmovaps))]
35357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35358pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35359    transmute(loadaps_128(mem_addr, src.as_f32x4(), k))
35360}
35361
35362/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35363/// (elements are zeroed out when the corresponding mask bit is not set).
35364/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35365///
35366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
35367#[inline]
35368#[target_feature(enable = "avx512f,avx512vl")]
35369#[cfg_attr(test, assert_instr(vmovaps))]
35370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35371pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35372    _mm_mask_load_ps(_mm_setzero_ps(), k, mem_addr)
35373}
35374
35375/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35376/// (elements are copied from src when the corresponding mask bit is not set).
35377/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35378///
35379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
35380#[inline]
35381#[target_feature(enable = "avx512f,avx512vl")]
35382#[cfg_attr(test, assert_instr(vmovapd))]
35383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35384pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35385    transmute(loadapd_128(mem_addr, src.as_f64x2(), k))
35386}
35387
35388/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35389/// (elements are zeroed out when the corresponding mask bit is not set).
35390/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35391///
35392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
35393#[inline]
35394#[target_feature(enable = "avx512f,avx512vl")]
35395#[cfg_attr(test, assert_instr(vmovapd))]
35396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35397pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35398    _mm_mask_load_pd(_mm_setzero_pd(), k, mem_addr)
35399}
35400
35401/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35402/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35403/// 3 packed elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35404/// exception may be generated.
35405///
35406/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
35407#[inline]
35408#[cfg_attr(test, assert_instr(vmovss))]
35409#[target_feature(enable = "avx512f")]
35410#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35411pub unsafe fn _mm_mask_load_ss(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35412    let mut dst: __m128 = src;
35413    asm!(
35414        vpl!("vmovss {dst}{{{k}}}"),
35415        p = in(reg) mem_addr,
35416        k = in(kreg) k,
35417        dst = inout(xmm_reg) dst,
35418        options(pure, readonly, nostack, preserves_flags),
35419    );
35420    dst
35421}
35422
35423/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35424/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper 3 packed
35425/// elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35426/// exception may be generated.
35427///
35428/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
35429#[inline]
35430#[cfg_attr(test, assert_instr(vmovss))]
35431#[target_feature(enable = "avx512f")]
35432#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35433pub unsafe fn _mm_maskz_load_ss(k: __mmask8, mem_addr: *const f32) -> __m128 {
35434    let mut dst: __m128;
35435    asm!(
35436        vpl!("vmovss {dst}{{{k}}} {{z}}"),
35437        p = in(reg) mem_addr,
35438        k = in(kreg) k,
35439        dst = out(xmm_reg) dst,
35440        options(pure, readonly, nostack, preserves_flags),
35441    );
35442    dst
35443}
35444
35445/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35446/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35447/// element of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35448/// exception may be generated.
35449///
35450/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
35451#[inline]
35452#[cfg_attr(test, assert_instr(vmovsd))]
35453#[target_feature(enable = "avx512f")]
35454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35455pub unsafe fn _mm_mask_load_sd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35456    let mut dst: __m128d = src;
35457    asm!(
35458        vpl!("vmovsd {dst}{{{k}}}"),
35459        p = in(reg) mem_addr,
35460        k = in(kreg) k,
35461        dst = inout(xmm_reg) dst,
35462        options(pure, readonly, nostack, preserves_flags),
35463    );
35464    dst
35465}
35466
35467/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35468/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper element
35469/// of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
35470/// may be generated.
35471///
35472/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
35473#[inline]
35474#[cfg_attr(test, assert_instr(vmovsd))]
35475#[target_feature(enable = "avx512f")]
35476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35477pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35478    let mut dst: __m128d;
35479    asm!(
35480        vpl!("vmovsd {dst}{{{k}}} {{z}}"),
35481        p = in(reg) mem_addr,
35482        k = in(kreg) k,
35483        dst = out(xmm_reg) dst,
35484        options(pure, readonly, nostack, preserves_flags),
35485    );
35486    dst
35487}
35488
35489/// Store packed 32-bit integers from a into memory using writemask k.
35490/// mem_addr does not need to be aligned on any particular boundary.
35491///
35492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32)
35493#[inline]
35494#[target_feature(enable = "avx512f")]
35495#[cfg_attr(test, assert_instr(vmovdqu32))]
35496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35497pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35498    storedqu32_512(mem_addr, a.as_i32x16(), mask)
35499}
35500
35501/// Store packed 64-bit integers from a into memory using writemask k.
35502/// mem_addr does not need to be aligned on any particular boundary.
35503///
35504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64)
35505#[inline]
35506#[target_feature(enable = "avx512f")]
35507#[cfg_attr(test, assert_instr(vmovdqu64))]
35508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35509pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35510    storedqu64_512(mem_addr, a.as_i64x8(), mask)
35511}
35512
35513/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35514/// mem_addr does not need to be aligned on any particular boundary.
35515///
35516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps)
35517#[inline]
35518#[target_feature(enable = "avx512f")]
35519#[cfg_attr(test, assert_instr(vmovups))]
35520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35521pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35522    storeups_512(mem_addr, a.as_f32x16(), mask)
35523}
35524
35525/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35526/// mem_addr does not need to be aligned on any particular boundary.
35527///
35528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd)
35529#[inline]
35530#[target_feature(enable = "avx512f")]
35531#[cfg_attr(test, assert_instr(vmovupd))]
35532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35533pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35534    storeupd_512(mem_addr, a.as_f64x8(), mask)
35535}
35536
35537/// Store packed 32-bit integers from a into memory using writemask k.
35538/// mem_addr does not need to be aligned on any particular boundary.
35539///
35540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32)
35541#[inline]
35542#[target_feature(enable = "avx512f,avx512vl")]
35543#[cfg_attr(test, assert_instr(vmovdqu32))]
35544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35545pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35546    storedqu32_256(mem_addr, a.as_i32x8(), mask)
35547}
35548
35549/// Store packed 64-bit integers from a into memory using writemask k.
35550/// mem_addr does not need to be aligned on any particular boundary.
35551///
35552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64)
35553#[inline]
35554#[target_feature(enable = "avx512f,avx512vl")]
35555#[cfg_attr(test, assert_instr(vmovdqu64))]
35556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35557pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35558    storedqu64_256(mem_addr, a.as_i64x4(), mask)
35559}
35560
35561/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35562/// mem_addr does not need to be aligned on any particular boundary.
35563///
35564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps)
35565#[inline]
35566#[target_feature(enable = "avx512f,avx512vl")]
35567#[cfg_attr(test, assert_instr(vmovups))]
35568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35569pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35570    storeups_256(mem_addr, a.as_f32x8(), mask)
35571}
35572
35573/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35574/// mem_addr does not need to be aligned on any particular boundary.
35575///
35576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd)
35577#[inline]
35578#[target_feature(enable = "avx512f,avx512vl")]
35579#[cfg_attr(test, assert_instr(vmovupd))]
35580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35581pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35582    storeupd_256(mem_addr, a.as_f64x4(), mask)
35583}
35584
35585/// Store packed 32-bit integers from a into memory using writemask k.
35586/// mem_addr does not need to be aligned on any particular boundary.
35587///
35588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32)
35589#[inline]
35590#[target_feature(enable = "avx512f,avx512vl")]
35591#[cfg_attr(test, assert_instr(vmovdqu32))]
35592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35593pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35594    storedqu32_128(mem_addr, a.as_i32x4(), mask)
35595}
35596
35597/// Store packed 64-bit integers from a into memory using writemask k.
35598/// mem_addr does not need to be aligned on any particular boundary.
35599///
35600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64)
35601#[inline]
35602#[target_feature(enable = "avx512f,avx512vl")]
35603#[cfg_attr(test, assert_instr(vmovdqu64))]
35604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35605pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35606    storedqu64_128(mem_addr, a.as_i64x2(), mask)
35607}
35608
35609/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35610/// mem_addr does not need to be aligned on any particular boundary.
35611///
35612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps)
35613#[inline]
35614#[target_feature(enable = "avx512f,avx512vl")]
35615#[cfg_attr(test, assert_instr(vmovups))]
35616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35617pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35618    storeups_128(mem_addr, a.as_f32x4(), mask)
35619}
35620
35621/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35622/// mem_addr does not need to be aligned on any particular boundary.
35623///
35624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd)
35625#[inline]
35626#[target_feature(enable = "avx512f,avx512vl")]
35627#[cfg_attr(test, assert_instr(vmovupd))]
35628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35629pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35630    storeupd_128(mem_addr, a.as_f64x2(), mask)
35631}
35632
35633/// Store packed 32-bit integers from a into memory using writemask k.
35634/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35635///
35636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
35637#[inline]
35638#[target_feature(enable = "avx512f")]
35639#[cfg_attr(test, assert_instr(vmovdqa32))]
35640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35641pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35642    storedqa32_512(mem_addr, a.as_i32x16(), mask)
35643}
35644
35645/// Store packed 64-bit integers from a into memory using writemask k.
35646/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35647///
35648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
35649#[inline]
35650#[target_feature(enable = "avx512f")]
35651#[cfg_attr(test, assert_instr(vmovdqa64))]
35652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35653pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35654    storedqa64_512(mem_addr, a.as_i64x8(), mask)
35655}
35656
35657/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35658/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35659///
35660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
35661#[inline]
35662#[target_feature(enable = "avx512f")]
35663#[cfg_attr(test, assert_instr(vmovaps))]
35664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35665pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35666    storeaps_512(mem_addr, a.as_f32x16(), mask)
35667}
35668
35669/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35670/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35671///
35672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
35673#[inline]
35674#[target_feature(enable = "avx512f")]
35675#[cfg_attr(test, assert_instr(vmovapd))]
35676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35677pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35678    storeapd_512(mem_addr, a.as_f64x8(), mask)
35679}
35680
35681/// Store packed 32-bit integers from a into memory using writemask k.
35682/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35683///
35684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
35685#[inline]
35686#[target_feature(enable = "avx512f,avx512vl")]
35687#[cfg_attr(test, assert_instr(vmovdqa32))]
35688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35689pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35690    storedqa32_256(mem_addr, a.as_i32x8(), mask)
35691}
35692
35693/// Store packed 64-bit integers from a into memory using writemask k.
35694/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35695///
35696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
35697#[inline]
35698#[target_feature(enable = "avx512f,avx512vl")]
35699#[cfg_attr(test, assert_instr(vmovdqa64))]
35700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35701pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35702    storedqa64_256(mem_addr, a.as_i64x4(), mask)
35703}
35704
35705/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35706/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35707///
35708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
35709#[inline]
35710#[target_feature(enable = "avx512f,avx512vl")]
35711#[cfg_attr(test, assert_instr(vmovaps))]
35712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35713pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35714    storeaps_256(mem_addr, a.as_f32x8(), mask)
35715}
35716
35717/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35718/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35719///
35720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
35721#[inline]
35722#[target_feature(enable = "avx512f,avx512vl")]
35723#[cfg_attr(test, assert_instr(vmovapd))]
35724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35725pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35726    storeapd_256(mem_addr, a.as_f64x4(), mask)
35727}
35728
35729/// Store packed 32-bit integers from a into memory using writemask k.
35730/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35731///
35732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
35733#[inline]
35734#[target_feature(enable = "avx512f,avx512vl")]
35735#[cfg_attr(test, assert_instr(vmovdqa32))]
35736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35737pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35738    storedqa32_128(mem_addr, a.as_i32x4(), mask)
35739}
35740
35741/// Store packed 64-bit integers from a into memory using writemask k.
35742/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35743///
35744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
35745#[inline]
35746#[target_feature(enable = "avx512f,avx512vl")]
35747#[cfg_attr(test, assert_instr(vmovdqa64))]
35748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35749pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35750    storedqa64_128(mem_addr, a.as_i64x2(), mask)
35751}
35752
35753/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35754/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35755///
35756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
35757#[inline]
35758#[target_feature(enable = "avx512f,avx512vl")]
35759#[cfg_attr(test, assert_instr(vmovaps))]
35760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35761pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35762    storeaps_128(mem_addr, a.as_f32x4(), mask)
35763}
35764
35765/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35766/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35767///
35768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
35769#[inline]
35770#[target_feature(enable = "avx512f,avx512vl")]
35771#[cfg_attr(test, assert_instr(vmovapd))]
35772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35773pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35774    storeapd_128(mem_addr, a.as_f64x2(), mask)
35775}
35776
35777/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
35778/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35779///
35780/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
35781#[inline]
35782#[cfg_attr(test, assert_instr(vmovss))]
35783#[target_feature(enable = "avx512f")]
35784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35785pub unsafe fn _mm_mask_store_ss(mem_addr: *mut f32, k: __mmask8, a: __m128) {
35786    asm!(
35787        vps!("vmovss", "{{{k}}}, {a}"),
35788        p = in(reg) mem_addr,
35789        k = in(kreg) k,
35790        a = in(xmm_reg) a,
35791        options(nostack, preserves_flags),
35792    );
35793}
35794
35795/// Store a double-precision (64-bit) floating-point element from a into memory using writemask k. mem_addr
35796/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35797///
35798/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
35799#[inline]
35800#[cfg_attr(test, assert_instr(vmovsd))]
35801#[target_feature(enable = "avx512f")]
35802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35803pub unsafe fn _mm_mask_store_sd(mem_addr: *mut f64, k: __mmask8, a: __m128d) {
35804    asm!(
35805        vps!("vmovsd", "{{{k}}}, {a}"),
35806        p = in(reg) mem_addr,
35807        k = in(kreg) k,
35808        a = in(xmm_reg) a,
35809        options(nostack, preserves_flags),
35810    );
35811}
35812
35813/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35814///
35815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
35816#[inline]
35817#[target_feature(enable = "avx512f")]
35818#[cfg_attr(test, assert_instr(vpexpandd))]
35819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35820pub unsafe fn _mm512_mask_expandloadu_epi32(
35821    src: __m512i,
35822    k: __mmask16,
35823    mem_addr: *const i32,
35824) -> __m512i {
35825    transmute(expandloadd_512(mem_addr, src.as_i32x16(), k))
35826}
35827
35828/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35829///
35830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32)
35831#[inline]
35832#[target_feature(enable = "avx512f")]
35833#[cfg_attr(test, assert_instr(vpexpandd))]
35834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35835pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35836    _mm512_mask_expandloadu_epi32(_mm512_setzero_si512(), k, mem_addr)
35837}
35838
35839/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35840///
35841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32)
35842#[inline]
35843#[target_feature(enable = "avx512f,avx512vl")]
35844#[cfg_attr(test, assert_instr(vpexpandd))]
35845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35846pub unsafe fn _mm256_mask_expandloadu_epi32(
35847    src: __m256i,
35848    k: __mmask8,
35849    mem_addr: *const i32,
35850) -> __m256i {
35851    transmute(expandloadd_256(mem_addr, src.as_i32x8(), k))
35852}
35853
35854/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35855///
35856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32)
35857#[inline]
35858#[target_feature(enable = "avx512f,avx512vl")]
35859#[cfg_attr(test, assert_instr(vpexpandd))]
35860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35861pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35862    _mm256_mask_expandloadu_epi32(_mm256_setzero_si256(), k, mem_addr)
35863}
35864
35865/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35866///
35867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32)
35868#[inline]
35869#[target_feature(enable = "avx512f,avx512vl")]
35870#[cfg_attr(test, assert_instr(vpexpandd))]
35871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35872pub unsafe fn _mm_mask_expandloadu_epi32(
35873    src: __m128i,
35874    k: __mmask8,
35875    mem_addr: *const i32,
35876) -> __m128i {
35877    transmute(expandloadd_128(mem_addr, src.as_i32x4(), k))
35878}
35879
35880/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35881///
35882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32)
35883#[inline]
35884#[target_feature(enable = "avx512f,avx512vl")]
35885#[cfg_attr(test, assert_instr(vpexpandd))]
35886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35887pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35888    _mm_mask_expandloadu_epi32(_mm_setzero_si128(), k, mem_addr)
35889}
35890
35891/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35892///
35893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64)
35894#[inline]
35895#[target_feature(enable = "avx512f")]
35896#[cfg_attr(test, assert_instr(vpexpandq))]
35897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35898pub unsafe fn _mm512_mask_expandloadu_epi64(
35899    src: __m512i,
35900    k: __mmask8,
35901    mem_addr: *const i64,
35902) -> __m512i {
35903    transmute(expandloadq_512(mem_addr, src.as_i64x8(), k))
35904}
35905
35906/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35907///
35908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64)
35909#[inline]
35910#[target_feature(enable = "avx512f")]
35911#[cfg_attr(test, assert_instr(vpexpandq))]
35912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35913pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35914    _mm512_mask_expandloadu_epi64(_mm512_setzero_si512(), k, mem_addr)
35915}
35916
35917/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35918///
35919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64)
35920#[inline]
35921#[target_feature(enable = "avx512f,avx512vl")]
35922#[cfg_attr(test, assert_instr(vpexpandq))]
35923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35924pub unsafe fn _mm256_mask_expandloadu_epi64(
35925    src: __m256i,
35926    k: __mmask8,
35927    mem_addr: *const i64,
35928) -> __m256i {
35929    transmute(expandloadq_256(mem_addr, src.as_i64x4(), k))
35930}
35931
35932/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35933///
35934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64)
35935#[inline]
35936#[target_feature(enable = "avx512f,avx512vl")]
35937#[cfg_attr(test, assert_instr(vpexpandq))]
35938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35939pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35940    _mm256_mask_expandloadu_epi64(_mm256_setzero_si256(), k, mem_addr)
35941}
35942
35943/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35944///
35945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64)
35946#[inline]
35947#[target_feature(enable = "avx512f,avx512vl")]
35948#[cfg_attr(test, assert_instr(vpexpandq))]
35949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35950pub unsafe fn _mm_mask_expandloadu_epi64(
35951    src: __m128i,
35952    k: __mmask8,
35953    mem_addr: *const i64,
35954) -> __m128i {
35955    transmute(expandloadq_128(mem_addr, src.as_i64x2(), k))
35956}
35957
35958/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35959///
35960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64)
35961#[inline]
35962#[target_feature(enable = "avx512f,avx512vl")]
35963#[cfg_attr(test, assert_instr(vpexpandq))]
35964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35965pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35966    _mm_mask_expandloadu_epi64(_mm_setzero_si128(), k, mem_addr)
35967}
35968
35969/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35970///
35971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps)
35972#[inline]
35973#[target_feature(enable = "avx512f")]
35974#[cfg_attr(test, assert_instr(vexpandps))]
35975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35976pub unsafe fn _mm512_mask_expandloadu_ps(
35977    src: __m512,
35978    k: __mmask16,
35979    mem_addr: *const f32,
35980) -> __m512 {
35981    transmute(expandloadps_512(mem_addr, src.as_f32x16(), k))
35982}
35983
35984/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35985///
35986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps)
35987#[inline]
35988#[target_feature(enable = "avx512f")]
35989#[cfg_attr(test, assert_instr(vexpandps))]
35990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35991pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35992    _mm512_mask_expandloadu_ps(_mm512_setzero_ps(), k, mem_addr)
35993}
35994
35995/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35996///
35997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps)
35998#[inline]
35999#[target_feature(enable = "avx512f,avx512vl")]
36000#[cfg_attr(test, assert_instr(vexpandps))]
36001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36002pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
36003    transmute(expandloadps_256(mem_addr, src.as_f32x8(), k))
36004}
36005
36006/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36007///
36008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps)
36009#[inline]
36010#[target_feature(enable = "avx512f,avx512vl")]
36011#[cfg_attr(test, assert_instr(vexpandps))]
36012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36013pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
36014    _mm256_mask_expandloadu_ps(_mm256_setzero_ps(), k, mem_addr)
36015}
36016
36017/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36018///
36019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps)
36020#[inline]
36021#[target_feature(enable = "avx512f,avx512vl")]
36022#[cfg_attr(test, assert_instr(vexpandps))]
36023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36024pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
36025    transmute(expandloadps_128(mem_addr, src.as_f32x4(), k))
36026}
36027
36028/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36029///
36030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps)
36031#[inline]
36032#[target_feature(enable = "avx512f,avx512vl")]
36033#[cfg_attr(test, assert_instr(vexpandps))]
36034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36035pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
36036    _mm_mask_expandloadu_ps(_mm_setzero_ps(), k, mem_addr)
36037}
36038
36039/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36040///
36041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd)
36042#[inline]
36043#[target_feature(enable = "avx512f")]
36044#[cfg_attr(test, assert_instr(vexpandpd))]
36045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36046pub unsafe fn _mm512_mask_expandloadu_pd(
36047    src: __m512d,
36048    k: __mmask8,
36049    mem_addr: *const f64,
36050) -> __m512d {
36051    transmute(expandloadpd_512(mem_addr, src.as_f64x8(), k))
36052}
36053
36054/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36055///
36056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd)
36057#[inline]
36058#[target_feature(enable = "avx512f")]
36059#[cfg_attr(test, assert_instr(vexpandpd))]
36060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36061pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
36062    _mm512_mask_expandloadu_pd(_mm512_setzero_pd(), k, mem_addr)
36063}
36064
36065/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36066///
36067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd)
36068#[inline]
36069#[target_feature(enable = "avx512f,avx512vl")]
36070#[cfg_attr(test, assert_instr(vexpandpd))]
36071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36072pub unsafe fn _mm256_mask_expandloadu_pd(
36073    src: __m256d,
36074    k: __mmask8,
36075    mem_addr: *const f64,
36076) -> __m256d {
36077    transmute(expandloadpd_256(mem_addr, src.as_f64x4(), k))
36078}
36079
36080/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36081///
36082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd)
36083#[inline]
36084#[target_feature(enable = "avx512f,avx512vl")]
36085#[cfg_attr(test, assert_instr(vexpandpd))]
36086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36087pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
36088    _mm256_mask_expandloadu_pd(_mm256_setzero_pd(), k, mem_addr)
36089}
36090
36091/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36092///
36093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd)
36094#[inline]
36095#[target_feature(enable = "avx512f,avx512vl")]
36096#[cfg_attr(test, assert_instr(vexpandpd))]
36097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36098pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
36099    transmute(expandloadpd_128(mem_addr, src.as_f64x2(), k))
36100}
36101
36102/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36103///
36104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd)
36105#[inline]
36106#[target_feature(enable = "avx512f,avx512vl")]
36107#[cfg_attr(test, assert_instr(vexpandpd))]
36108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36109pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
36110    _mm_mask_expandloadu_pd(_mm_setzero_pd(), k, mem_addr)
36111}
36112
36113/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
36114///
36115/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002)
36116#[inline]
36117#[target_feature(enable = "avx512f")]
36118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36119pub fn _mm512_setr_pd(
36120    e0: f64,
36121    e1: f64,
36122    e2: f64,
36123    e3: f64,
36124    e4: f64,
36125    e5: f64,
36126    e6: f64,
36127    e7: f64,
36128) -> __m512d {
36129    unsafe {
36130        let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
36131        transmute(r)
36132    }
36133}
36134
36135/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
36136///
36137/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924)
36138#[inline]
36139#[target_feature(enable = "avx512f")]
36140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36141pub fn _mm512_set_pd(
36142    e0: f64,
36143    e1: f64,
36144    e2: f64,
36145    e3: f64,
36146    e4: f64,
36147    e5: f64,
36148    e6: f64,
36149    e7: f64,
36150) -> __m512d {
36151    _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
36152}
36153
36154/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36155///
36156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832)
36157#[inline]
36158#[target_feature(enable = "avx512f")]
36159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36160#[cfg_attr(test, assert_instr(vmovss))]
36161pub fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36162    unsafe {
36163        let extractsrc: f32 = simd_extract!(src, 0);
36164        let mut mov: f32 = extractsrc;
36165        if (k & 0b00000001) != 0 {
36166            mov = simd_extract!(b, 0);
36167        }
36168        simd_insert!(a, 0, mov)
36169    }
36170}
36171
36172/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36173///
36174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833)
36175#[inline]
36176#[target_feature(enable = "avx512f")]
36177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36178#[cfg_attr(test, assert_instr(vmovss))]
36179pub fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36180    unsafe {
36181        let mut mov: f32 = 0.;
36182        if (k & 0b00000001) != 0 {
36183            mov = simd_extract!(b, 0);
36184        }
36185        simd_insert!(a, 0, mov)
36186    }
36187}
36188
36189/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36190///
36191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829)
36192#[inline]
36193#[target_feature(enable = "avx512f")]
36194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36195#[cfg_attr(test, assert_instr(vmovsd))]
36196pub fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36197    unsafe {
36198        let extractsrc: f64 = simd_extract!(src, 0);
36199        let mut mov: f64 = extractsrc;
36200        if (k & 0b00000001) != 0 {
36201            mov = simd_extract!(b, 0);
36202        }
36203        simd_insert!(a, 0, mov)
36204    }
36205}
36206
36207/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36208///
36209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830)
36210#[inline]
36211#[target_feature(enable = "avx512f")]
36212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36213#[cfg_attr(test, assert_instr(vmovsd))]
36214pub fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36215    unsafe {
36216        let mut mov: f64 = 0.;
36217        if (k & 0b00000001) != 0 {
36218            mov = simd_extract!(b, 0);
36219        }
36220        simd_insert!(a, 0, mov)
36221    }
36222}
36223
36224/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36225///
36226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159)
36227#[inline]
36228#[target_feature(enable = "avx512f")]
36229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36230#[cfg_attr(test, assert_instr(vaddss))]
36231pub fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36232    unsafe {
36233        let extractsrc: f32 = simd_extract!(src, 0);
36234        let mut add: f32 = extractsrc;
36235        if (k & 0b00000001) != 0 {
36236            let extracta: f32 = simd_extract!(a, 0);
36237            let extractb: f32 = simd_extract!(b, 0);
36238            add = extracta + extractb;
36239        }
36240        simd_insert!(a, 0, add)
36241    }
36242}
36243
36244/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36245///
36246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160)
36247#[inline]
36248#[target_feature(enable = "avx512f")]
36249#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36250#[cfg_attr(test, assert_instr(vaddss))]
36251pub fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36252    unsafe {
36253        let mut add: f32 = 0.;
36254        if (k & 0b00000001) != 0 {
36255            let extracta: f32 = simd_extract!(a, 0);
36256            let extractb: f32 = simd_extract!(b, 0);
36257            add = extracta + extractb;
36258        }
36259        simd_insert!(a, 0, add)
36260    }
36261}
36262
36263/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36264///
36265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155)
36266#[inline]
36267#[target_feature(enable = "avx512f")]
36268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36269#[cfg_attr(test, assert_instr(vaddsd))]
36270pub fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36271    unsafe {
36272        let extractsrc: f64 = simd_extract!(src, 0);
36273        let mut add: f64 = extractsrc;
36274        if (k & 0b00000001) != 0 {
36275            let extracta: f64 = simd_extract!(a, 0);
36276            let extractb: f64 = simd_extract!(b, 0);
36277            add = extracta + extractb;
36278        }
36279        simd_insert!(a, 0, add)
36280    }
36281}
36282
36283/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36284///
36285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156)
36286#[inline]
36287#[target_feature(enable = "avx512f")]
36288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36289#[cfg_attr(test, assert_instr(vaddsd))]
36290pub fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36291    unsafe {
36292        let mut add: f64 = 0.;
36293        if (k & 0b00000001) != 0 {
36294            let extracta: f64 = simd_extract!(a, 0);
36295            let extractb: f64 = simd_extract!(b, 0);
36296            add = extracta + extractb;
36297        }
36298        simd_insert!(a, 0, add)
36299    }
36300}
36301
36302/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36303///
36304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750)
36305#[inline]
36306#[target_feature(enable = "avx512f")]
36307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36308#[cfg_attr(test, assert_instr(vsubss))]
36309pub fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36310    unsafe {
36311        let extractsrc: f32 = simd_extract!(src, 0);
36312        let mut add: f32 = extractsrc;
36313        if (k & 0b00000001) != 0 {
36314            let extracta: f32 = simd_extract!(a, 0);
36315            let extractb: f32 = simd_extract!(b, 0);
36316            add = extracta - extractb;
36317        }
36318        simd_insert!(a, 0, add)
36319    }
36320}
36321
36322/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36323///
36324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751)
36325#[inline]
36326#[target_feature(enable = "avx512f")]
36327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36328#[cfg_attr(test, assert_instr(vsubss))]
36329pub fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36330    unsafe {
36331        let mut add: f32 = 0.;
36332        if (k & 0b00000001) != 0 {
36333            let extracta: f32 = simd_extract!(a, 0);
36334            let extractb: f32 = simd_extract!(b, 0);
36335            add = extracta - extractb;
36336        }
36337        simd_insert!(a, 0, add)
36338    }
36339}
36340
36341/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36342///
36343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746)
36344#[inline]
36345#[target_feature(enable = "avx512f")]
36346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36347#[cfg_attr(test, assert_instr(vsubsd))]
36348pub fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36349    unsafe {
36350        let extractsrc: f64 = simd_extract!(src, 0);
36351        let mut add: f64 = extractsrc;
36352        if (k & 0b00000001) != 0 {
36353            let extracta: f64 = simd_extract!(a, 0);
36354            let extractb: f64 = simd_extract!(b, 0);
36355            add = extracta - extractb;
36356        }
36357        simd_insert!(a, 0, add)
36358    }
36359}
36360
36361/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36362///
36363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747)
36364#[inline]
36365#[target_feature(enable = "avx512f")]
36366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36367#[cfg_attr(test, assert_instr(vsubsd))]
36368pub fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36369    unsafe {
36370        let mut add: f64 = 0.;
36371        if (k & 0b00000001) != 0 {
36372            let extracta: f64 = simd_extract!(a, 0);
36373            let extractb: f64 = simd_extract!(b, 0);
36374            add = extracta - extractb;
36375        }
36376        simd_insert!(a, 0, add)
36377    }
36378}
36379
36380/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36381///
36382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950)
36383#[inline]
36384#[target_feature(enable = "avx512f")]
36385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36386#[cfg_attr(test, assert_instr(vmulss))]
36387pub fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36388    unsafe {
36389        let extractsrc: f32 = simd_extract!(src, 0);
36390        let mut add: f32 = extractsrc;
36391        if (k & 0b00000001) != 0 {
36392            let extracta: f32 = simd_extract!(a, 0);
36393            let extractb: f32 = simd_extract!(b, 0);
36394            add = extracta * extractb;
36395        }
36396        simd_insert!(a, 0, add)
36397    }
36398}
36399
36400/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36401///
36402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951)
36403#[inline]
36404#[target_feature(enable = "avx512f")]
36405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36406#[cfg_attr(test, assert_instr(vmulss))]
36407pub fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36408    unsafe {
36409        let mut add: f32 = 0.;
36410        if (k & 0b00000001) != 0 {
36411            let extracta: f32 = simd_extract!(a, 0);
36412            let extractb: f32 = simd_extract!(b, 0);
36413            add = extracta * extractb;
36414        }
36415        simd_insert!(a, 0, add)
36416    }
36417}
36418
36419/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36420///
36421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947)
36422#[inline]
36423#[target_feature(enable = "avx512f")]
36424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36425#[cfg_attr(test, assert_instr(vmulsd))]
36426pub fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36427    unsafe {
36428        let extractsrc: f64 = simd_extract!(src, 0);
36429        let mut add: f64 = extractsrc;
36430        if (k & 0b00000001) != 0 {
36431            let extracta: f64 = simd_extract!(a, 0);
36432            let extractb: f64 = simd_extract!(b, 0);
36433            add = extracta * extractb;
36434        }
36435        simd_insert!(a, 0, add)
36436    }
36437}
36438
36439/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36440///
36441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948)
36442#[inline]
36443#[target_feature(enable = "avx512f")]
36444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36445#[cfg_attr(test, assert_instr(vmulsd))]
36446pub fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36447    unsafe {
36448        let mut add: f64 = 0.;
36449        if (k & 0b00000001) != 0 {
36450            let extracta: f64 = simd_extract!(a, 0);
36451            let extractb: f64 = simd_extract!(b, 0);
36452            add = extracta * extractb;
36453        }
36454        simd_insert!(a, 0, add)
36455    }
36456}
36457
36458/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36459///
36460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181)
36461#[inline]
36462#[target_feature(enable = "avx512f")]
36463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36464#[cfg_attr(test, assert_instr(vdivss))]
36465pub fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36466    unsafe {
36467        let extractsrc: f32 = simd_extract!(src, 0);
36468        let mut add: f32 = extractsrc;
36469        if (k & 0b00000001) != 0 {
36470            let extracta: f32 = simd_extract!(a, 0);
36471            let extractb: f32 = simd_extract!(b, 0);
36472            add = extracta / extractb;
36473        }
36474        simd_insert!(a, 0, add)
36475    }
36476}
36477
36478/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36479///
36480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182)
36481#[inline]
36482#[target_feature(enable = "avx512f")]
36483#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36484#[cfg_attr(test, assert_instr(vdivss))]
36485pub fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36486    unsafe {
36487        let mut add: f32 = 0.;
36488        if (k & 0b00000001) != 0 {
36489            let extracta: f32 = simd_extract!(a, 0);
36490            let extractb: f32 = simd_extract!(b, 0);
36491            add = extracta / extractb;
36492        }
36493        simd_insert!(a, 0, add)
36494    }
36495}
36496
36497/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36498///
36499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178)
36500#[inline]
36501#[target_feature(enable = "avx512f")]
36502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36503#[cfg_attr(test, assert_instr(vdivsd))]
36504pub fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36505    unsafe {
36506        let extractsrc: f64 = simd_extract!(src, 0);
36507        let mut add: f64 = extractsrc;
36508        if (k & 0b00000001) != 0 {
36509            let extracta: f64 = simd_extract!(a, 0);
36510            let extractb: f64 = simd_extract!(b, 0);
36511            add = extracta / extractb;
36512        }
36513        simd_insert!(a, 0, add)
36514    }
36515}
36516
36517/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36518///
36519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179)
36520#[inline]
36521#[target_feature(enable = "avx512f")]
36522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36523#[cfg_attr(test, assert_instr(vdivsd))]
36524pub fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36525    unsafe {
36526        let mut add: f64 = 0.;
36527        if (k & 0b00000001) != 0 {
36528            let extracta: f64 = simd_extract!(a, 0);
36529            let extractb: f64 = simd_extract!(b, 0);
36530            add = extracta / extractb;
36531        }
36532        simd_insert!(a, 0, add)
36533    }
36534}
36535
36536/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36537///
36538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
36539#[inline]
36540#[target_feature(enable = "avx512f")]
36541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36542#[cfg_attr(test, assert_instr(vmaxss))]
36543pub fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36544    unsafe {
36545        transmute(vmaxss(
36546            a.as_f32x4(),
36547            b.as_f32x4(),
36548            src.as_f32x4(),
36549            k,
36550            _MM_FROUND_CUR_DIRECTION,
36551        ))
36552    }
36553}
36554
36555/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36556///
36557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673)
36558#[inline]
36559#[target_feature(enable = "avx512f")]
36560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36561#[cfg_attr(test, assert_instr(vmaxss))]
36562pub fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36563    unsafe {
36564        transmute(vmaxss(
36565            a.as_f32x4(),
36566            b.as_f32x4(),
36567            f32x4::ZERO,
36568            k,
36569            _MM_FROUND_CUR_DIRECTION,
36570        ))
36571    }
36572}
36573
36574/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36575///
36576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669)
36577#[inline]
36578#[target_feature(enable = "avx512f")]
36579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36580#[cfg_attr(test, assert_instr(vmaxsd))]
36581pub fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36582    unsafe {
36583        transmute(vmaxsd(
36584            a.as_f64x2(),
36585            b.as_f64x2(),
36586            src.as_f64x2(),
36587            k,
36588            _MM_FROUND_CUR_DIRECTION,
36589        ))
36590    }
36591}
36592
36593/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36594///
36595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
36596#[inline]
36597#[target_feature(enable = "avx512f")]
36598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36599#[cfg_attr(test, assert_instr(vmaxsd))]
36600pub fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36601    unsafe {
36602        transmute(vmaxsd(
36603            a.as_f64x2(),
36604            b.as_f64x2(),
36605            f64x2::ZERO,
36606            k,
36607            _MM_FROUND_CUR_DIRECTION,
36608        ))
36609    }
36610}
36611
36612/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36613///
36614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786)
36615#[inline]
36616#[target_feature(enable = "avx512f")]
36617#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36618#[cfg_attr(test, assert_instr(vminss))]
36619pub fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36620    unsafe {
36621        transmute(vminss(
36622            a.as_f32x4(),
36623            b.as_f32x4(),
36624            src.as_f32x4(),
36625            k,
36626            _MM_FROUND_CUR_DIRECTION,
36627        ))
36628    }
36629}
36630
36631/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36632///
36633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787)
36634#[inline]
36635#[target_feature(enable = "avx512f")]
36636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36637#[cfg_attr(test, assert_instr(vminss))]
36638pub fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36639    unsafe {
36640        transmute(vminss(
36641            a.as_f32x4(),
36642            b.as_f32x4(),
36643            f32x4::ZERO,
36644            k,
36645            _MM_FROUND_CUR_DIRECTION,
36646        ))
36647    }
36648}
36649
36650/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36651///
36652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783)
36653#[inline]
36654#[target_feature(enable = "avx512f")]
36655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36656#[cfg_attr(test, assert_instr(vminsd))]
36657pub fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36658    unsafe {
36659        transmute(vminsd(
36660            a.as_f64x2(),
36661            b.as_f64x2(),
36662            src.as_f64x2(),
36663            k,
36664            _MM_FROUND_CUR_DIRECTION,
36665        ))
36666    }
36667}
36668
36669/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36670///
36671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784)
36672#[inline]
36673#[target_feature(enable = "avx512f")]
36674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36675#[cfg_attr(test, assert_instr(vminsd))]
36676pub fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36677    unsafe {
36678        transmute(vminsd(
36679            a.as_f64x2(),
36680            b.as_f64x2(),
36681            f64x2::ZERO,
36682            k,
36683            _MM_FROUND_CUR_DIRECTION,
36684        ))
36685    }
36686}
36687
36688/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36689///
36690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387)
36691#[inline]
36692#[target_feature(enable = "avx512f")]
36693#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36694#[cfg_attr(test, assert_instr(vsqrtss))]
36695pub fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36696    unsafe { vsqrtss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
36697}
36698
36699/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36700///
36701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388)
36702#[inline]
36703#[target_feature(enable = "avx512f")]
36704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36705#[cfg_attr(test, assert_instr(vsqrtss))]
36706pub fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36707    unsafe { vsqrtss(a, b, _mm_setzero_ps(), k, _MM_FROUND_CUR_DIRECTION) }
36708}
36709
36710/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36711///
36712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384)
36713#[inline]
36714#[target_feature(enable = "avx512f")]
36715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36716#[cfg_attr(test, assert_instr(vsqrtsd))]
36717pub fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36718    unsafe { vsqrtsd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
36719}
36720
36721/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36722///
36723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385)
36724#[inline]
36725#[target_feature(enable = "avx512f")]
36726#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36727#[cfg_attr(test, assert_instr(vsqrtsd))]
36728pub fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36729    unsafe { vsqrtsd(a, b, _mm_setzero_pd(), k, _MM_FROUND_CUR_DIRECTION) }
36730}
36731
36732/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36733///
36734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825)
36735#[inline]
36736#[target_feature(enable = "avx512f")]
36737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36738#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36739pub fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
36740    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) }
36741}
36742
36743/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36744///
36745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823)
36746#[inline]
36747#[target_feature(enable = "avx512f")]
36748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36749#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36750pub fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36751    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
36752}
36753
36754/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36755///
36756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824)
36757#[inline]
36758#[target_feature(enable = "avx512f")]
36759#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36760#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36761pub fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36762    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
36763}
36764
36765/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36766///
36767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822)
36768#[inline]
36769#[target_feature(enable = "avx512f")]
36770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36771#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36772pub fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
36773    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) }
36774}
36775
36776/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36777///
36778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820)
36779#[inline]
36780#[target_feature(enable = "avx512f")]
36781#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36782#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36783pub fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36784    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
36785}
36786
36787/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36788///
36789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821)
36790#[inline]
36791#[target_feature(enable = "avx512f")]
36792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36793#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36794pub fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36795    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
36796}
36797
36798/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36799///
36800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508)
36801#[inline]
36802#[target_feature(enable = "avx512f")]
36803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36804#[cfg_attr(test, assert_instr(vrcp14ss))]
36805pub fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
36806    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) }
36807}
36808
36809/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36810///
36811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506)
36812#[inline]
36813#[target_feature(enable = "avx512f")]
36814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36815#[cfg_attr(test, assert_instr(vrcp14ss))]
36816pub fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36817    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
36818}
36819
36820/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36821///
36822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507)
36823#[inline]
36824#[target_feature(enable = "avx512f")]
36825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36826#[cfg_attr(test, assert_instr(vrcp14ss))]
36827pub fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36828    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
36829}
36830
36831/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36832///
36833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505)
36834#[inline]
36835#[target_feature(enable = "avx512f")]
36836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36837#[cfg_attr(test, assert_instr(vrcp14sd))]
36838pub fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
36839    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) }
36840}
36841
36842/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36843///
36844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503)
36845#[inline]
36846#[target_feature(enable = "avx512f")]
36847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36848#[cfg_attr(test, assert_instr(vrcp14sd))]
36849pub fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36850    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
36851}
36852
36853/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36854///
36855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504)
36856#[inline]
36857#[target_feature(enable = "avx512f")]
36858#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36859#[cfg_attr(test, assert_instr(vrcp14sd))]
36860pub fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36861    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
36862}
36863
36864/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36865///
36866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862)
36867#[inline]
36868#[target_feature(enable = "avx512f")]
36869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36870#[cfg_attr(test, assert_instr(vgetexpss))]
36871pub fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
36872    unsafe {
36873        transmute(vgetexpss(
36874            a.as_f32x4(),
36875            b.as_f32x4(),
36876            f32x4::ZERO,
36877            0b1,
36878            _MM_FROUND_NO_EXC,
36879        ))
36880    }
36881}
36882
36883/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36884///
36885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863)
36886#[inline]
36887#[target_feature(enable = "avx512f")]
36888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36889#[cfg_attr(test, assert_instr(vgetexpss))]
36890pub fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36891    unsafe {
36892        transmute(vgetexpss(
36893            a.as_f32x4(),
36894            b.as_f32x4(),
36895            src.as_f32x4(),
36896            k,
36897            _MM_FROUND_NO_EXC,
36898        ))
36899    }
36900}
36901
36902/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36903///
36904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864)
36905#[inline]
36906#[target_feature(enable = "avx512f")]
36907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36908#[cfg_attr(test, assert_instr(vgetexpss))]
36909pub fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36910    unsafe {
36911        transmute(vgetexpss(
36912            a.as_f32x4(),
36913            b.as_f32x4(),
36914            f32x4::ZERO,
36915            k,
36916            _MM_FROUND_NO_EXC,
36917        ))
36918    }
36919}
36920
36921/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36922///
36923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859)
36924#[inline]
36925#[target_feature(enable = "avx512f")]
36926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36927#[cfg_attr(test, assert_instr(vgetexpsd))]
36928pub fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
36929    unsafe {
36930        transmute(vgetexpsd(
36931            a.as_f64x2(),
36932            b.as_f64x2(),
36933            f64x2::ZERO,
36934            0b1,
36935            _MM_FROUND_NO_EXC,
36936        ))
36937    }
36938}
36939
36940/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36941///
36942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860)
36943#[inline]
36944#[target_feature(enable = "avx512f")]
36945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36946#[cfg_attr(test, assert_instr(vgetexpsd))]
36947pub fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36948    unsafe {
36949        transmute(vgetexpsd(
36950            a.as_f64x2(),
36951            b.as_f64x2(),
36952            src.as_f64x2(),
36953            k,
36954            _MM_FROUND_NO_EXC,
36955        ))
36956    }
36957}
36958
36959/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36960///
36961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861)
36962#[inline]
36963#[target_feature(enable = "avx512f")]
36964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36965#[cfg_attr(test, assert_instr(vgetexpsd))]
36966pub fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36967    unsafe {
36968        transmute(vgetexpsd(
36969            a.as_f64x2(),
36970            b.as_f64x2(),
36971            f64x2::ZERO,
36972            k,
36973            _MM_FROUND_NO_EXC,
36974        ))
36975    }
36976}
36977
36978/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36979/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36980///    _MM_MANT_NORM_1_2     // interval [1, 2)\
36981///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
36982///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
36983///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36984/// The sign is determined by sc which can take the following values:\
36985///    _MM_MANT_SIGN_src     // sign = sign(src)\
36986///    _MM_MANT_SIGN_zero    // sign = 0\
36987///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
36988/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36989///
36990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898)
36991#[inline]
36992#[target_feature(enable = "avx512f")]
36993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36994#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
36995#[rustc_legacy_const_generics(2, 3)]
36996pub fn _mm_getmant_ss<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
36997    a: __m128,
36998    b: __m128,
36999) -> __m128 {
37000    unsafe {
37001        static_assert_uimm_bits!(NORM, 4);
37002        static_assert_uimm_bits!(SIGN, 2);
37003        let a = a.as_f32x4();
37004        let b = b.as_f32x4();
37005        let r = vgetmantss(
37006            a,
37007            b,
37008            SIGN << 2 | NORM,
37009            f32x4::ZERO,
37010            0b1,
37011            _MM_FROUND_CUR_DIRECTION,
37012        );
37013        transmute(r)
37014    }
37015}
37016
37017/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37018/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37019///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37020///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37021///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37022///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37023/// The sign is determined by sc which can take the following values:\
37024///    _MM_MANT_SIGN_src     // sign = sign(src)\
37025///    _MM_MANT_SIGN_zero    // sign = 0\
37026///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37027/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37028///
37029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899)
37030#[inline]
37031#[target_feature(enable = "avx512f")]
37032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37033#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
37034#[rustc_legacy_const_generics(4, 5)]
37035pub fn _mm_mask_getmant_ss<
37036    const NORM: _MM_MANTISSA_NORM_ENUM,
37037    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37038>(
37039    src: __m128,
37040    k: __mmask8,
37041    a: __m128,
37042    b: __m128,
37043) -> __m128 {
37044    unsafe {
37045        static_assert_uimm_bits!(NORM, 4);
37046        static_assert_uimm_bits!(SIGN, 2);
37047        let a = a.as_f32x4();
37048        let b = b.as_f32x4();
37049        let src = src.as_f32x4();
37050        let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
37051        transmute(r)
37052    }
37053}
37054
37055/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37056/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37057///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37058///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37059///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37060///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37061/// The sign is determined by sc which can take the following values:\
37062///    _MM_MANT_SIGN_src     // sign = sign(src)\
37063///    _MM_MANT_SIGN_zero    // sign = 0\
37064///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37065/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37066///
37067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900)
37068#[inline]
37069#[target_feature(enable = "avx512f")]
37070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37071#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
37072#[rustc_legacy_const_generics(3, 4)]
37073pub fn _mm_maskz_getmant_ss<
37074    const NORM: _MM_MANTISSA_NORM_ENUM,
37075    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37076>(
37077    k: __mmask8,
37078    a: __m128,
37079    b: __m128,
37080) -> __m128 {
37081    unsafe {
37082        static_assert_uimm_bits!(NORM, 4);
37083        static_assert_uimm_bits!(SIGN, 2);
37084        let a = a.as_f32x4();
37085        let b = b.as_f32x4();
37086        let r = vgetmantss(
37087            a,
37088            b,
37089            SIGN << 2 | NORM,
37090            f32x4::ZERO,
37091            k,
37092            _MM_FROUND_CUR_DIRECTION,
37093        );
37094        transmute(r)
37095    }
37096}
37097
37098/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37099/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37100///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37101///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37102///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37103///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37104/// The sign is determined by sc which can take the following values:\
37105///    _MM_MANT_SIGN_src     // sign = sign(src)\
37106///    _MM_MANT_SIGN_zero    // sign = 0\
37107///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37108/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37109///
37110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895)
37111#[inline]
37112#[target_feature(enable = "avx512f")]
37113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37114#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37115#[rustc_legacy_const_generics(2, 3)]
37116pub fn _mm_getmant_sd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
37117    a: __m128d,
37118    b: __m128d,
37119) -> __m128d {
37120    unsafe {
37121        static_assert_uimm_bits!(NORM, 4);
37122        static_assert_uimm_bits!(SIGN, 2);
37123        let a = a.as_f64x2();
37124        let b = b.as_f64x2();
37125        let r = vgetmantsd(
37126            a,
37127            b,
37128            SIGN << 2 | NORM,
37129            f64x2::ZERO,
37130            0b1,
37131            _MM_FROUND_CUR_DIRECTION,
37132        );
37133        transmute(r)
37134    }
37135}
37136
37137/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37138/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37139///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37140///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37141///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37142///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37143/// The sign is determined by sc which can take the following values:\
37144///    _MM_MANT_SIGN_src     // sign = sign(src)\
37145///    _MM_MANT_SIGN_zero    // sign = 0\
37146///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37147/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37148///
37149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896)
37150#[inline]
37151#[target_feature(enable = "avx512f")]
37152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37153#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37154#[rustc_legacy_const_generics(4, 5)]
37155pub fn _mm_mask_getmant_sd<
37156    const NORM: _MM_MANTISSA_NORM_ENUM,
37157    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37158>(
37159    src: __m128d,
37160    k: __mmask8,
37161    a: __m128d,
37162    b: __m128d,
37163) -> __m128d {
37164    unsafe {
37165        static_assert_uimm_bits!(NORM, 4);
37166        static_assert_uimm_bits!(SIGN, 2);
37167        let a = a.as_f64x2();
37168        let b = b.as_f64x2();
37169        let src = src.as_f64x2();
37170        let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
37171        transmute(r)
37172    }
37173}
37174
37175/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37176/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37177///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37178///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37179///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37180///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37181/// The sign is determined by sc which can take the following values:\
37182///    _MM_MANT_SIGN_src     // sign = sign(src)\
37183///    _MM_MANT_SIGN_zero    // sign = 0\
37184///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37185/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37186///
37187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897)
37188#[inline]
37189#[target_feature(enable = "avx512f")]
37190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37191#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37192#[rustc_legacy_const_generics(3, 4)]
37193pub fn _mm_maskz_getmant_sd<
37194    const NORM: _MM_MANTISSA_NORM_ENUM,
37195    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37196>(
37197    k: __mmask8,
37198    a: __m128d,
37199    b: __m128d,
37200) -> __m128d {
37201    unsafe {
37202        static_assert_uimm_bits!(NORM, 4);
37203        static_assert_uimm_bits!(SIGN, 2);
37204        let a = a.as_f64x2();
37205        let b = b.as_f64x2();
37206        let r = vgetmantsd(
37207            a,
37208            b,
37209            SIGN << 2 | NORM,
37210            f64x2::ZERO,
37211            k,
37212            _MM_FROUND_CUR_DIRECTION,
37213        );
37214        transmute(r)
37215    }
37216}
37217
37218/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37219/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37220/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37221/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37222/// * [`_MM_FROUND_TO_POS_INF`] : round up
37223/// * [`_MM_FROUND_TO_ZERO`] : truncate
37224/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37225///
37226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802)
37227#[inline]
37228#[target_feature(enable = "avx512f")]
37229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37230#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 255))]
37231#[rustc_legacy_const_generics(2)]
37232pub fn _mm_roundscale_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
37233    unsafe {
37234        static_assert_uimm_bits!(IMM8, 8);
37235        let a = a.as_f32x4();
37236        let b = b.as_f32x4();
37237        let r = vrndscaless(
37238            a,
37239            b,
37240            f32x4::ZERO,
37241            0b11111111,
37242            IMM8,
37243            _MM_FROUND_CUR_DIRECTION,
37244        );
37245        transmute(r)
37246    }
37247}
37248
37249/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37250/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37251/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37252/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37253/// * [`_MM_FROUND_TO_POS_INF`] : round up
37254/// * [`_MM_FROUND_TO_ZERO`] : truncate
37255/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37256///
37257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800)
37258#[inline]
37259#[target_feature(enable = "avx512f")]
37260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37261#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
37262#[rustc_legacy_const_generics(4)]
37263pub fn _mm_mask_roundscale_ss<const IMM8: i32>(
37264    src: __m128,
37265    k: __mmask8,
37266    a: __m128,
37267    b: __m128,
37268) -> __m128 {
37269    unsafe {
37270        static_assert_uimm_bits!(IMM8, 8);
37271        let a = a.as_f32x4();
37272        let b = b.as_f32x4();
37273        let src = src.as_f32x4();
37274        let r = vrndscaless(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37275        transmute(r)
37276    }
37277}
37278
37279/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37280/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37281/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37282/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37283/// * [`_MM_FROUND_TO_POS_INF`] : round up
37284/// * [`_MM_FROUND_TO_ZERO`] : truncate
37285/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37286///
37287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801)
37288#[inline]
37289#[target_feature(enable = "avx512f")]
37290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37291#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
37292#[rustc_legacy_const_generics(3)]
37293pub fn _mm_maskz_roundscale_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37294    unsafe {
37295        static_assert_uimm_bits!(IMM8, 8);
37296        let a = a.as_f32x4();
37297        let b = b.as_f32x4();
37298        let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37299        transmute(r)
37300    }
37301}
37302
37303/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37304/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37305/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37306/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37307/// * [`_MM_FROUND_TO_POS_INF`] : round up
37308/// * [`_MM_FROUND_TO_ZERO`] : truncate
37309/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37310///
37311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799)
37312#[inline]
37313#[target_feature(enable = "avx512f")]
37314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37315#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 255))]
37316#[rustc_legacy_const_generics(2)]
37317pub fn _mm_roundscale_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
37318    unsafe {
37319        static_assert_uimm_bits!(IMM8, 8);
37320        let a = a.as_f64x2();
37321        let b = b.as_f64x2();
37322        let r = vrndscalesd(
37323            a,
37324            b,
37325            f64x2::ZERO,
37326            0b11111111,
37327            IMM8,
37328            _MM_FROUND_CUR_DIRECTION,
37329        );
37330        transmute(r)
37331    }
37332}
37333
37334/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37335/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37336/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37337/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37338/// * [`_MM_FROUND_TO_POS_INF`] : round up
37339/// * [`_MM_FROUND_TO_ZERO`] : truncate
37340/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37341///
37342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797)
37343#[inline]
37344#[target_feature(enable = "avx512f")]
37345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37346#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
37347#[rustc_legacy_const_generics(4)]
37348pub fn _mm_mask_roundscale_sd<const IMM8: i32>(
37349    src: __m128d,
37350    k: __mmask8,
37351    a: __m128d,
37352    b: __m128d,
37353) -> __m128d {
37354    unsafe {
37355        static_assert_uimm_bits!(IMM8, 8);
37356        let a = a.as_f64x2();
37357        let b = b.as_f64x2();
37358        let src = src.as_f64x2();
37359        let r = vrndscalesd(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37360        transmute(r)
37361    }
37362}
37363
37364/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37365/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37366/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37367/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37368/// * [`_MM_FROUND_TO_POS_INF`] : round up
37369/// * [`_MM_FROUND_TO_ZERO`] : truncate
37370/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37371///
37372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798)
37373#[inline]
37374#[target_feature(enable = "avx512f")]
37375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37376#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
37377#[rustc_legacy_const_generics(3)]
37378pub fn _mm_maskz_roundscale_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37379    unsafe {
37380        static_assert_uimm_bits!(IMM8, 8);
37381        let a = a.as_f64x2();
37382        let b = b.as_f64x2();
37383        let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37384        transmute(r)
37385    }
37386}
37387
37388/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37389///
37390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901)
37391#[inline]
37392#[target_feature(enable = "avx512f")]
37393#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37394#[cfg_attr(test, assert_instr(vscalefss))]
37395pub fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
37396    unsafe {
37397        let a = a.as_f32x4();
37398        let b = b.as_f32x4();
37399        transmute(vscalefss(
37400            a,
37401            b,
37402            f32x4::ZERO,
37403            0b11111111,
37404            _MM_FROUND_CUR_DIRECTION,
37405        ))
37406    }
37407}
37408
37409/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37410///
37411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899)
37412#[inline]
37413#[target_feature(enable = "avx512f")]
37414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37415#[cfg_attr(test, assert_instr(vscalefss))]
37416pub fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
37417    unsafe {
37418        let a = a.as_f32x4();
37419        let b = b.as_f32x4();
37420        let src = src.as_f32x4();
37421        transmute(vscalefss(a, b, src, k, _MM_FROUND_CUR_DIRECTION))
37422    }
37423}
37424
37425/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37426///
37427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900)
37428#[inline]
37429#[target_feature(enable = "avx512f")]
37430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37431#[cfg_attr(test, assert_instr(vscalefss))]
37432pub fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37433    unsafe {
37434        transmute(vscalefss(
37435            a.as_f32x4(),
37436            b.as_f32x4(),
37437            f32x4::ZERO,
37438            k,
37439            _MM_FROUND_CUR_DIRECTION,
37440        ))
37441    }
37442}
37443
37444/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
37445///
37446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898)
37447#[inline]
37448#[target_feature(enable = "avx512f")]
37449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37450#[cfg_attr(test, assert_instr(vscalefsd))]
37451pub fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
37452    unsafe {
37453        transmute(vscalefsd(
37454            a.as_f64x2(),
37455            b.as_f64x2(),
37456            f64x2::ZERO,
37457            0b11111111,
37458            _MM_FROUND_CUR_DIRECTION,
37459        ))
37460    }
37461}
37462
37463/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37464///
37465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896)
37466#[inline]
37467#[target_feature(enable = "avx512f")]
37468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37469#[cfg_attr(test, assert_instr(vscalefsd))]
37470pub fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37471    unsafe {
37472        transmute(vscalefsd(
37473            a.as_f64x2(),
37474            b.as_f64x2(),
37475            src.as_f64x2(),
37476            k,
37477            _MM_FROUND_CUR_DIRECTION,
37478        ))
37479    }
37480}
37481
37482/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37483///
37484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897)
37485#[inline]
37486#[target_feature(enable = "avx512f")]
37487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37488#[cfg_attr(test, assert_instr(vscalefsd))]
37489pub fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37490    unsafe {
37491        transmute(vscalefsd(
37492            a.as_f64x2(),
37493            b.as_f64x2(),
37494            f64x2::ZERO,
37495            k,
37496            _MM_FROUND_CUR_DIRECTION,
37497        ))
37498    }
37499}
37500
37501/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37502///
37503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582)
37504#[inline]
37505#[target_feature(enable = "avx512f")]
37506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37507#[cfg_attr(test, assert_instr(vfmadd))]
37508pub fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37509    unsafe {
37510        let mut fmadd: f32 = simd_extract!(a, 0);
37511        if (k & 0b00000001) != 0 {
37512            let extractb: f32 = simd_extract!(b, 0);
37513            let extractc: f32 = simd_extract!(c, 0);
37514            fmadd = fmaf32(fmadd, extractb, extractc);
37515        }
37516        simd_insert!(a, 0, fmadd)
37517    }
37518}
37519
37520/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37521///
37522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584)
37523#[inline]
37524#[target_feature(enable = "avx512f")]
37525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37526#[cfg_attr(test, assert_instr(vfmadd))]
37527pub fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37528    unsafe {
37529        let mut fmadd: f32 = 0.;
37530        if (k & 0b00000001) != 0 {
37531            let extracta: f32 = simd_extract!(a, 0);
37532            let extractb: f32 = simd_extract!(b, 0);
37533            let extractc: f32 = simd_extract!(c, 0);
37534            fmadd = fmaf32(extracta, extractb, extractc);
37535        }
37536        simd_insert!(a, 0, fmadd)
37537    }
37538}
37539
37540/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37541///
37542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583)
37543#[inline]
37544#[target_feature(enable = "avx512f")]
37545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37546#[cfg_attr(test, assert_instr(vfmadd))]
37547pub fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37548    unsafe {
37549        let mut fmadd: f32 = simd_extract!(c, 0);
37550        if (k & 0b00000001) != 0 {
37551            let extracta: f32 = simd_extract!(a, 0);
37552            let extractb: f32 = simd_extract!(b, 0);
37553            fmadd = fmaf32(extracta, extractb, fmadd);
37554        }
37555        simd_insert!(c, 0, fmadd)
37556    }
37557}
37558
37559/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37560///
37561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578)
37562#[inline]
37563#[target_feature(enable = "avx512f")]
37564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37565#[cfg_attr(test, assert_instr(vfmadd))]
37566pub fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37567    unsafe {
37568        let mut fmadd: f64 = simd_extract!(a, 0);
37569        if (k & 0b00000001) != 0 {
37570            let extractb: f64 = simd_extract!(b, 0);
37571            let extractc: f64 = simd_extract!(c, 0);
37572            fmadd = fmaf64(fmadd, extractb, extractc);
37573        }
37574        simd_insert!(a, 0, fmadd)
37575    }
37576}
37577
37578/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37579///
37580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580)
37581#[inline]
37582#[target_feature(enable = "avx512f")]
37583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37584#[cfg_attr(test, assert_instr(vfmadd))]
37585pub fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37586    unsafe {
37587        let mut fmadd: f64 = 0.;
37588        if (k & 0b00000001) != 0 {
37589            let extracta: f64 = simd_extract!(a, 0);
37590            let extractb: f64 = simd_extract!(b, 0);
37591            let extractc: f64 = simd_extract!(c, 0);
37592            fmadd = fmaf64(extracta, extractb, extractc);
37593        }
37594        simd_insert!(a, 0, fmadd)
37595    }
37596}
37597
37598/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37599///
37600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579)
37601#[inline]
37602#[target_feature(enable = "avx512f")]
37603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37604#[cfg_attr(test, assert_instr(vfmadd))]
37605pub fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37606    unsafe {
37607        let mut fmadd: f64 = simd_extract!(c, 0);
37608        if (k & 0b00000001) != 0 {
37609            let extracta: f64 = simd_extract!(a, 0);
37610            let extractb: f64 = simd_extract!(b, 0);
37611            fmadd = fmaf64(extracta, extractb, fmadd);
37612        }
37613        simd_insert!(c, 0, fmadd)
37614    }
37615}
37616
37617/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37618///
37619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668)
37620#[inline]
37621#[target_feature(enable = "avx512f")]
37622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37623#[cfg_attr(test, assert_instr(vfmsub))]
37624pub fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37625    unsafe {
37626        let mut fmsub: f32 = simd_extract!(a, 0);
37627        if (k & 0b00000001) != 0 {
37628            let extractb: f32 = simd_extract!(b, 0);
37629            let extractc: f32 = simd_extract!(c, 0);
37630            let extractc = -extractc;
37631            fmsub = fmaf32(fmsub, extractb, extractc);
37632        }
37633        simd_insert!(a, 0, fmsub)
37634    }
37635}
37636
37637/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37638///
37639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670)
37640#[inline]
37641#[target_feature(enable = "avx512f")]
37642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37643#[cfg_attr(test, assert_instr(vfmsub))]
37644pub fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37645    unsafe {
37646        let mut fmsub: f32 = 0.;
37647        if (k & 0b00000001) != 0 {
37648            let extracta: f32 = simd_extract!(a, 0);
37649            let extractb: f32 = simd_extract!(b, 0);
37650            let extractc: f32 = simd_extract!(c, 0);
37651            let extractc = -extractc;
37652            fmsub = fmaf32(extracta, extractb, extractc);
37653        }
37654        simd_insert!(a, 0, fmsub)
37655    }
37656}
37657
37658/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37659///
37660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669)
37661#[inline]
37662#[target_feature(enable = "avx512f")]
37663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37664#[cfg_attr(test, assert_instr(vfmsub))]
37665pub fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37666    unsafe {
37667        let mut fmsub: f32 = simd_extract!(c, 0);
37668        if (k & 0b00000001) != 0 {
37669            let extracta: f32 = simd_extract!(a, 0);
37670            let extractb: f32 = simd_extract!(b, 0);
37671            let extractc = -fmsub;
37672            fmsub = fmaf32(extracta, extractb, extractc);
37673        }
37674        simd_insert!(c, 0, fmsub)
37675    }
37676}
37677
37678/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37679///
37680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664)
37681#[inline]
37682#[target_feature(enable = "avx512f")]
37683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37684#[cfg_attr(test, assert_instr(vfmsub))]
37685pub fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37686    unsafe {
37687        let mut fmsub: f64 = simd_extract!(a, 0);
37688        if (k & 0b00000001) != 0 {
37689            let extractb: f64 = simd_extract!(b, 0);
37690            let extractc: f64 = simd_extract!(c, 0);
37691            let extractc = -extractc;
37692            fmsub = fmaf64(fmsub, extractb, extractc);
37693        }
37694        simd_insert!(a, 0, fmsub)
37695    }
37696}
37697
37698/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37699///
37700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666)
37701#[inline]
37702#[target_feature(enable = "avx512f")]
37703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37704#[cfg_attr(test, assert_instr(vfmsub))]
37705pub fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37706    unsafe {
37707        let mut fmsub: f64 = 0.;
37708        if (k & 0b00000001) != 0 {
37709            let extracta: f64 = simd_extract!(a, 0);
37710            let extractb: f64 = simd_extract!(b, 0);
37711            let extractc: f64 = simd_extract!(c, 0);
37712            let extractc = -extractc;
37713            fmsub = fmaf64(extracta, extractb, extractc);
37714        }
37715        simd_insert!(a, 0, fmsub)
37716    }
37717}
37718
37719/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37720///
37721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665)
37722#[inline]
37723#[target_feature(enable = "avx512f")]
37724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37725#[cfg_attr(test, assert_instr(vfmsub))]
37726pub fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37727    unsafe {
37728        let mut fmsub: f64 = simd_extract!(c, 0);
37729        if (k & 0b00000001) != 0 {
37730            let extracta: f64 = simd_extract!(a, 0);
37731            let extractb: f64 = simd_extract!(b, 0);
37732            let extractc = -fmsub;
37733            fmsub = fmaf64(extracta, extractb, extractc);
37734        }
37735        simd_insert!(c, 0, fmsub)
37736    }
37737}
37738
37739/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37740///
37741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748)
37742#[inline]
37743#[target_feature(enable = "avx512f")]
37744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37745#[cfg_attr(test, assert_instr(vfnmadd))]
37746pub fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37747    unsafe {
37748        let mut fnmadd: f32 = simd_extract!(a, 0);
37749        if (k & 0b00000001) != 0 {
37750            let extracta = -fnmadd;
37751            let extractb: f32 = simd_extract!(b, 0);
37752            let extractc: f32 = simd_extract!(c, 0);
37753            fnmadd = fmaf32(extracta, extractb, extractc);
37754        }
37755        simd_insert!(a, 0, fnmadd)
37756    }
37757}
37758
37759/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37760///
37761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750)
37762#[inline]
37763#[target_feature(enable = "avx512f")]
37764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37765#[cfg_attr(test, assert_instr(vfnmadd))]
37766pub fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37767    unsafe {
37768        let mut fnmadd: f32 = 0.;
37769        if (k & 0b00000001) != 0 {
37770            let extracta: f32 = simd_extract!(a, 0);
37771            let extracta = -extracta;
37772            let extractb: f32 = simd_extract!(b, 0);
37773            let extractc: f32 = simd_extract!(c, 0);
37774            fnmadd = fmaf32(extracta, extractb, extractc);
37775        }
37776        simd_insert!(a, 0, fnmadd)
37777    }
37778}
37779
37780/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37781///
37782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749)
37783#[inline]
37784#[target_feature(enable = "avx512f")]
37785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37786#[cfg_attr(test, assert_instr(vfnmadd))]
37787pub fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37788    unsafe {
37789        let mut fnmadd: f32 = simd_extract!(c, 0);
37790        if (k & 0b00000001) != 0 {
37791            let extracta: f32 = simd_extract!(a, 0);
37792            let extracta = -extracta;
37793            let extractb: f32 = simd_extract!(b, 0);
37794            fnmadd = fmaf32(extracta, extractb, fnmadd);
37795        }
37796        simd_insert!(c, 0, fnmadd)
37797    }
37798}
37799
37800/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37801///
37802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744)
37803#[inline]
37804#[target_feature(enable = "avx512f")]
37805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37806#[cfg_attr(test, assert_instr(vfnmadd))]
37807pub fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37808    unsafe {
37809        let mut fnmadd: f64 = simd_extract!(a, 0);
37810        if (k & 0b00000001) != 0 {
37811            let extracta = -fnmadd;
37812            let extractb: f64 = simd_extract!(b, 0);
37813            let extractc: f64 = simd_extract!(c, 0);
37814            fnmadd = fmaf64(extracta, extractb, extractc);
37815        }
37816        simd_insert!(a, 0, fnmadd)
37817    }
37818}
37819
37820/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37821///
37822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746)
37823#[inline]
37824#[target_feature(enable = "avx512f")]
37825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37826#[cfg_attr(test, assert_instr(vfnmadd))]
37827pub fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37828    unsafe {
37829        let mut fnmadd: f64 = 0.;
37830        if (k & 0b00000001) != 0 {
37831            let extracta: f64 = simd_extract!(a, 0);
37832            let extracta = -extracta;
37833            let extractb: f64 = simd_extract!(b, 0);
37834            let extractc: f64 = simd_extract!(c, 0);
37835            fnmadd = fmaf64(extracta, extractb, extractc);
37836        }
37837        simd_insert!(a, 0, fnmadd)
37838    }
37839}
37840
37841/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37842///
37843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745)
37844#[inline]
37845#[target_feature(enable = "avx512f")]
37846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37847#[cfg_attr(test, assert_instr(vfnmadd))]
37848pub fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37849    unsafe {
37850        let mut fnmadd: f64 = simd_extract!(c, 0);
37851        if (k & 0b00000001) != 0 {
37852            let extracta: f64 = simd_extract!(a, 0);
37853            let extracta = -extracta;
37854            let extractb: f64 = simd_extract!(b, 0);
37855            fnmadd = fmaf64(extracta, extractb, fnmadd);
37856        }
37857        simd_insert!(c, 0, fnmadd)
37858    }
37859}
37860
37861/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37862///
37863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796)
37864#[inline]
37865#[target_feature(enable = "avx512f")]
37866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37867#[cfg_attr(test, assert_instr(vfnmsub))]
37868pub fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37869    unsafe {
37870        let mut fnmsub: f32 = simd_extract!(a, 0);
37871        if (k & 0b00000001) != 0 {
37872            let extracta = -fnmsub;
37873            let extractb: f32 = simd_extract!(b, 0);
37874            let extractc: f32 = simd_extract!(c, 0);
37875            let extractc = -extractc;
37876            fnmsub = fmaf32(extracta, extractb, extractc);
37877        }
37878        simd_insert!(a, 0, fnmsub)
37879    }
37880}
37881
37882/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37883///
37884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798)
37885#[inline]
37886#[target_feature(enable = "avx512f")]
37887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37888#[cfg_attr(test, assert_instr(vfnmsub))]
37889pub fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37890    unsafe {
37891        let mut fnmsub: f32 = 0.;
37892        if (k & 0b00000001) != 0 {
37893            let extracta: f32 = simd_extract!(a, 0);
37894            let extracta = -extracta;
37895            let extractb: f32 = simd_extract!(b, 0);
37896            let extractc: f32 = simd_extract!(c, 0);
37897            let extractc = -extractc;
37898            fnmsub = fmaf32(extracta, extractb, extractc);
37899        }
37900        simd_insert!(a, 0, fnmsub)
37901    }
37902}
37903
37904/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37905///
37906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797)
37907#[inline]
37908#[target_feature(enable = "avx512f")]
37909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37910#[cfg_attr(test, assert_instr(vfnmsub))]
37911pub fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37912    unsafe {
37913        let mut fnmsub: f32 = simd_extract!(c, 0);
37914        if (k & 0b00000001) != 0 {
37915            let extracta: f32 = simd_extract!(a, 0);
37916            let extracta = -extracta;
37917            let extractb: f32 = simd_extract!(b, 0);
37918            let extractc = -fnmsub;
37919            fnmsub = fmaf32(extracta, extractb, extractc);
37920        }
37921        simd_insert!(c, 0, fnmsub)
37922    }
37923}
37924
37925/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37926///
37927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792)
37928#[inline]
37929#[target_feature(enable = "avx512f")]
37930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37931#[cfg_attr(test, assert_instr(vfnmsub))]
37932pub fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37933    unsafe {
37934        let mut fnmsub: f64 = simd_extract!(a, 0);
37935        if (k & 0b00000001) != 0 {
37936            let extracta = -fnmsub;
37937            let extractb: f64 = simd_extract!(b, 0);
37938            let extractc: f64 = simd_extract!(c, 0);
37939            let extractc = -extractc;
37940            fnmsub = fmaf64(extracta, extractb, extractc);
37941        }
37942        simd_insert!(a, 0, fnmsub)
37943    }
37944}
37945
37946/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37947///
37948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794)
37949#[inline]
37950#[target_feature(enable = "avx512f")]
37951#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37952#[cfg_attr(test, assert_instr(vfnmsub))]
37953pub fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37954    unsafe {
37955        let mut fnmsub: f64 = 0.;
37956        if (k & 0b00000001) != 0 {
37957            let extracta: f64 = simd_extract!(a, 0);
37958            let extracta = -extracta;
37959            let extractb: f64 = simd_extract!(b, 0);
37960            let extractc: f64 = simd_extract!(c, 0);
37961            let extractc = -extractc;
37962            fnmsub = fmaf64(extracta, extractb, extractc);
37963        }
37964        simd_insert!(a, 0, fnmsub)
37965    }
37966}
37967
37968/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37969///
37970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793)
37971#[inline]
37972#[target_feature(enable = "avx512f")]
37973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37974#[cfg_attr(test, assert_instr(vfnmsub))]
37975pub fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37976    unsafe {
37977        let mut fnmsub: f64 = simd_extract!(c, 0);
37978        if (k & 0b00000001) != 0 {
37979            let extracta: f64 = simd_extract!(a, 0);
37980            let extracta = -extracta;
37981            let extractb: f64 = simd_extract!(b, 0);
37982            let extractc = -fnmsub;
37983            fnmsub = fmaf64(extracta, extractb, extractc);
37984        }
37985        simd_insert!(c, 0, fnmsub)
37986    }
37987}
37988
37989/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37990///
37991/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37992/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37993/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37994/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37995/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37996/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37997///
37998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151)
37999#[inline]
38000#[target_feature(enable = "avx512f")]
38001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38002#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
38003#[rustc_legacy_const_generics(2)]
38004pub fn _mm_add_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38005    unsafe {
38006        static_assert_rounding!(ROUNDING);
38007        let a = a.as_f32x4();
38008        let b = b.as_f32x4();
38009        let r = vaddss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38010        transmute(r)
38011    }
38012}
38013
38014/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38015///
38016/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38017/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38018/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38019/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38020/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38021/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38022///
38023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152)
38024#[inline]
38025#[target_feature(enable = "avx512f")]
38026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38027#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
38028#[rustc_legacy_const_generics(4)]
38029pub fn _mm_mask_add_round_ss<const ROUNDING: i32>(
38030    src: __m128,
38031    k: __mmask8,
38032    a: __m128,
38033    b: __m128,
38034) -> __m128 {
38035    unsafe {
38036        static_assert_rounding!(ROUNDING);
38037        let a = a.as_f32x4();
38038        let b = b.as_f32x4();
38039        let src = src.as_f32x4();
38040        let r = vaddss(a, b, src, k, ROUNDING);
38041        transmute(r)
38042    }
38043}
38044
38045/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38046///
38047/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38048/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38049/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38050/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38051/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38052/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38053///
38054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153)
38055#[inline]
38056#[target_feature(enable = "avx512f")]
38057#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38058#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
38059#[rustc_legacy_const_generics(3)]
38060pub fn _mm_maskz_add_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38061    unsafe {
38062        static_assert_rounding!(ROUNDING);
38063        let a = a.as_f32x4();
38064        let b = b.as_f32x4();
38065        let r = vaddss(a, b, f32x4::ZERO, k, ROUNDING);
38066        transmute(r)
38067    }
38068}
38069
38070/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38071///
38072/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38073/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38074/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38075/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38076/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38077/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38078///
38079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148)
38080#[inline]
38081#[target_feature(enable = "avx512f")]
38082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38083#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38084#[rustc_legacy_const_generics(2)]
38085pub fn _mm_add_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38086    unsafe {
38087        static_assert_rounding!(ROUNDING);
38088        let a = a.as_f64x2();
38089        let b = b.as_f64x2();
38090        let r = vaddsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38091        transmute(r)
38092    }
38093}
38094
38095/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38096///
38097/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38098/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38099/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38100/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38101/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38102/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38103///
38104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_sd&expand=149)
38105#[inline]
38106#[target_feature(enable = "avx512f")]
38107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38108#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38109#[rustc_legacy_const_generics(4)]
38110pub fn _mm_mask_add_round_sd<const ROUNDING: i32>(
38111    src: __m128d,
38112    k: __mmask8,
38113    a: __m128d,
38114    b: __m128d,
38115) -> __m128d {
38116    unsafe {
38117        static_assert_rounding!(ROUNDING);
38118        let a = a.as_f64x2();
38119        let b = b.as_f64x2();
38120        let src = src.as_f64x2();
38121        let r = vaddsd(a, b, src, k, ROUNDING);
38122        transmute(r)
38123    }
38124}
38125
38126/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38127///
38128/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38129/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38130/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38131/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38132/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38133/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38134///
38135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150)
38136#[inline]
38137#[target_feature(enable = "avx512f")]
38138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38139#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38140#[rustc_legacy_const_generics(3)]
38141pub fn _mm_maskz_add_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38142    unsafe {
38143        static_assert_rounding!(ROUNDING);
38144        let a = a.as_f64x2();
38145        let b = b.as_f64x2();
38146        let r = vaddsd(a, b, f64x2::ZERO, k, ROUNDING);
38147        transmute(r)
38148    }
38149}
38150
38151/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38152///
38153/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38154/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38155/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38156/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38157/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38158/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38159///
38160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745)
38161#[inline]
38162#[target_feature(enable = "avx512f")]
38163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38164#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38165#[rustc_legacy_const_generics(2)]
38166pub fn _mm_sub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38167    unsafe {
38168        static_assert_rounding!(ROUNDING);
38169        let a = a.as_f32x4();
38170        let b = b.as_f32x4();
38171        let r = vsubss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38172        transmute(r)
38173    }
38174}
38175
38176/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38177///
38178/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38179/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38180/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38181/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38182/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38183/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38184///
38185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743)
38186#[inline]
38187#[target_feature(enable = "avx512f")]
38188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38189#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38190#[rustc_legacy_const_generics(4)]
38191pub fn _mm_mask_sub_round_ss<const ROUNDING: i32>(
38192    src: __m128,
38193    k: __mmask8,
38194    a: __m128,
38195    b: __m128,
38196) -> __m128 {
38197    unsafe {
38198        static_assert_rounding!(ROUNDING);
38199        let a = a.as_f32x4();
38200        let b = b.as_f32x4();
38201        let src = src.as_f32x4();
38202        let r = vsubss(a, b, src, k, ROUNDING);
38203        transmute(r)
38204    }
38205}
38206
38207/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38208///
38209/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38210/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38211/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38212/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38213/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38214/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38215///
38216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744)
38217#[inline]
38218#[target_feature(enable = "avx512f")]
38219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38220#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38221#[rustc_legacy_const_generics(3)]
38222pub fn _mm_maskz_sub_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38223    unsafe {
38224        static_assert_rounding!(ROUNDING);
38225        let a = a.as_f32x4();
38226        let b = b.as_f32x4();
38227        let r = vsubss(a, b, f32x4::ZERO, k, ROUNDING);
38228        transmute(r)
38229    }
38230}
38231
38232/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38233///
38234/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38240///
38241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742)
38242#[inline]
38243#[target_feature(enable = "avx512f")]
38244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38245#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38246#[rustc_legacy_const_generics(2)]
38247pub fn _mm_sub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38248    unsafe {
38249        static_assert_rounding!(ROUNDING);
38250        let a = a.as_f64x2();
38251        let b = b.as_f64x2();
38252        let r = vsubsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38253        transmute(r)
38254    }
38255}
38256
38257/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38258///
38259/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38260/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38261/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38262/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38263/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38264/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38265///
38266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740)
38267#[inline]
38268#[target_feature(enable = "avx512f")]
38269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38270#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38271#[rustc_legacy_const_generics(4)]
38272pub fn _mm_mask_sub_round_sd<const ROUNDING: i32>(
38273    src: __m128d,
38274    k: __mmask8,
38275    a: __m128d,
38276    b: __m128d,
38277) -> __m128d {
38278    unsafe {
38279        static_assert_rounding!(ROUNDING);
38280        let a = a.as_f64x2();
38281        let b = b.as_f64x2();
38282        let src = src.as_f64x2();
38283        let r = vsubsd(a, b, src, k, ROUNDING);
38284        transmute(r)
38285    }
38286}
38287
38288/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38289///
38290/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38291/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38292/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38293/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38294/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38295/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38296///
38297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741)
38298#[inline]
38299#[target_feature(enable = "avx512f")]
38300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38301#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38302#[rustc_legacy_const_generics(3)]
38303pub fn _mm_maskz_sub_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38304    unsafe {
38305        static_assert_rounding!(ROUNDING);
38306        let a = a.as_f64x2();
38307        let b = b.as_f64x2();
38308        let r = vsubsd(a, b, f64x2::ZERO, k, ROUNDING);
38309        transmute(r)
38310    }
38311}
38312
38313/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38314///
38315/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38316/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38317/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38318/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38319/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38320/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38321///
38322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946)
38323#[inline]
38324#[target_feature(enable = "avx512f")]
38325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38326#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38327#[rustc_legacy_const_generics(2)]
38328pub fn _mm_mul_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38329    unsafe {
38330        static_assert_rounding!(ROUNDING);
38331        let a = a.as_f32x4();
38332        let b = b.as_f32x4();
38333        let r = vmulss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38334        transmute(r)
38335    }
38336}
38337
38338/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38339///
38340/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38341/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38342/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38343/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38344/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38345/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38346///
38347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944)
38348#[inline]
38349#[target_feature(enable = "avx512f")]
38350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38351#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38352#[rustc_legacy_const_generics(4)]
38353pub fn _mm_mask_mul_round_ss<const ROUNDING: i32>(
38354    src: __m128,
38355    k: __mmask8,
38356    a: __m128,
38357    b: __m128,
38358) -> __m128 {
38359    unsafe {
38360        static_assert_rounding!(ROUNDING);
38361        let a = a.as_f32x4();
38362        let b = b.as_f32x4();
38363        let src = src.as_f32x4();
38364        let r = vmulss(a, b, src, k, ROUNDING);
38365        transmute(r)
38366    }
38367}
38368
38369/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38370///
38371/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38372/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38373/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38374/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38375/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38376/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38377///
38378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945)
38379#[inline]
38380#[target_feature(enable = "avx512f")]
38381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38382#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38383#[rustc_legacy_const_generics(3)]
38384pub fn _mm_maskz_mul_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38385    unsafe {
38386        static_assert_rounding!(ROUNDING);
38387        let a = a.as_f32x4();
38388        let b = b.as_f32x4();
38389        let r = vmulss(a, b, f32x4::ZERO, k, ROUNDING);
38390        transmute(r)
38391    }
38392}
38393
38394/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38395///
38396/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38397/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38398/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38399/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38400/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38401/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38402///
38403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943)
38404#[inline]
38405#[target_feature(enable = "avx512f")]
38406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38407#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38408#[rustc_legacy_const_generics(2)]
38409pub fn _mm_mul_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38410    unsafe {
38411        static_assert_rounding!(ROUNDING);
38412        let a = a.as_f64x2();
38413        let b = b.as_f64x2();
38414        let r = vmulsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38415        transmute(r)
38416    }
38417}
38418
38419/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38420///
38421/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38422/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38423/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38424/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38425/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38426/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38427///
38428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941)
38429#[inline]
38430#[target_feature(enable = "avx512f")]
38431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38432#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38433#[rustc_legacy_const_generics(4)]
38434pub fn _mm_mask_mul_round_sd<const ROUNDING: i32>(
38435    src: __m128d,
38436    k: __mmask8,
38437    a: __m128d,
38438    b: __m128d,
38439) -> __m128d {
38440    unsafe {
38441        static_assert_rounding!(ROUNDING);
38442        let a = a.as_f64x2();
38443        let b = b.as_f64x2();
38444        let src = src.as_f64x2();
38445        let r = vmulsd(a, b, src, k, ROUNDING);
38446        transmute(r)
38447    }
38448}
38449
38450/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38451///
38452/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38453/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38454/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38455/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38456/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38457/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38458///
38459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942)
38460#[inline]
38461#[target_feature(enable = "avx512f")]
38462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38463#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38464#[rustc_legacy_const_generics(3)]
38465pub fn _mm_maskz_mul_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38466    unsafe {
38467        static_assert_rounding!(ROUNDING);
38468        let a = a.as_f64x2();
38469        let b = b.as_f64x2();
38470        let r = vmulsd(a, b, f64x2::ZERO, k, ROUNDING);
38471        transmute(r)
38472    }
38473}
38474
38475/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38476///
38477/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38478/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38479/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38480/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38481/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38482/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38483///
38484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174)
38485#[inline]
38486#[target_feature(enable = "avx512f")]
38487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38488#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38489#[rustc_legacy_const_generics(2)]
38490pub fn _mm_div_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38491    unsafe {
38492        static_assert_rounding!(ROUNDING);
38493        let a = a.as_f32x4();
38494        let b = b.as_f32x4();
38495        let r = vdivss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38496        transmute(r)
38497    }
38498}
38499
38500/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38501///
38502/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38503/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38504/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38505/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38506/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38507/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38508///
38509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175)
38510#[inline]
38511#[target_feature(enable = "avx512f")]
38512#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38513#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38514#[rustc_legacy_const_generics(4)]
38515pub fn _mm_mask_div_round_ss<const ROUNDING: i32>(
38516    src: __m128,
38517    k: __mmask8,
38518    a: __m128,
38519    b: __m128,
38520) -> __m128 {
38521    unsafe {
38522        static_assert_rounding!(ROUNDING);
38523        let a = a.as_f32x4();
38524        let b = b.as_f32x4();
38525        let src = src.as_f32x4();
38526        let r = vdivss(a, b, src, k, ROUNDING);
38527        transmute(r)
38528    }
38529}
38530
38531/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38532///
38533/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38534/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38535/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38536/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38537/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38538/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38539///
38540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176)
38541#[inline]
38542#[target_feature(enable = "avx512f")]
38543#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38544#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38545#[rustc_legacy_const_generics(3)]
38546pub fn _mm_maskz_div_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38547    unsafe {
38548        static_assert_rounding!(ROUNDING);
38549        let a = a.as_f32x4();
38550        let b = b.as_f32x4();
38551        let r = vdivss(a, b, f32x4::ZERO, k, ROUNDING);
38552        transmute(r)
38553    }
38554}
38555
38556/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38557///
38558/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38559/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38560/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38561/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38562/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38563/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38564///
38565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171)
38566#[inline]
38567#[target_feature(enable = "avx512f")]
38568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38569#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38570#[rustc_legacy_const_generics(2)]
38571pub fn _mm_div_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38572    unsafe {
38573        static_assert_rounding!(ROUNDING);
38574        let a = a.as_f64x2();
38575        let b = b.as_f64x2();
38576        let r = vdivsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38577        transmute(r)
38578    }
38579}
38580
38581/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38582///
38583/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38584/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38585/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38586/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38587/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38588/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38589///
38590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172)
38591#[inline]
38592#[target_feature(enable = "avx512f")]
38593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38594#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38595#[rustc_legacy_const_generics(4)]
38596pub fn _mm_mask_div_round_sd<const ROUNDING: i32>(
38597    src: __m128d,
38598    k: __mmask8,
38599    a: __m128d,
38600    b: __m128d,
38601) -> __m128d {
38602    unsafe {
38603        static_assert_rounding!(ROUNDING);
38604        let a = a.as_f64x2();
38605        let b = b.as_f64x2();
38606        let src = src.as_f64x2();
38607        let r = vdivsd(a, b, src, k, ROUNDING);
38608        transmute(r)
38609    }
38610}
38611
38612/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38613///
38614/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38615/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38616/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38617/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38618/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38619/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38620///
38621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173)
38622#[inline]
38623#[target_feature(enable = "avx512f")]
38624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38625#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38626#[rustc_legacy_const_generics(3)]
38627pub fn _mm_maskz_div_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38628    unsafe {
38629        static_assert_rounding!(ROUNDING);
38630        let a = a.as_f64x2();
38631        let b = b.as_f64x2();
38632        let r = vdivsd(a, b, f64x2::ZERO, k, ROUNDING);
38633        transmute(r)
38634    }
38635}
38636
38637/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38638/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38639///
38640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668)
38641#[inline]
38642#[target_feature(enable = "avx512f")]
38643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38644#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38645#[rustc_legacy_const_generics(2)]
38646pub fn _mm_max_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38647    unsafe {
38648        static_assert_sae!(SAE);
38649        let a = a.as_f32x4();
38650        let b = b.as_f32x4();
38651        let r = vmaxss(a, b, f32x4::ZERO, 0b1, SAE);
38652        transmute(r)
38653    }
38654}
38655
38656/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38657/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38658///
38659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_round_ss&expand=3672)
38660#[inline]
38661#[target_feature(enable = "avx512f")]
38662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38663#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38664#[rustc_legacy_const_generics(4)]
38665pub fn _mm_mask_max_round_ss<const SAE: i32>(
38666    src: __m128,
38667    k: __mmask8,
38668    a: __m128,
38669    b: __m128,
38670) -> __m128 {
38671    unsafe {
38672        static_assert_sae!(SAE);
38673        let a = a.as_f32x4();
38674        let b = b.as_f32x4();
38675        let src = src.as_f32x4();
38676        let r = vmaxss(a, b, src, k, SAE);
38677        transmute(r)
38678    }
38679}
38680
38681/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38682/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38683///
38684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667)
38685#[inline]
38686#[target_feature(enable = "avx512f")]
38687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38688#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38689#[rustc_legacy_const_generics(3)]
38690pub fn _mm_maskz_max_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38691    unsafe {
38692        static_assert_sae!(SAE);
38693        let a = a.as_f32x4();
38694        let b = b.as_f32x4();
38695        let r = vmaxss(a, b, f32x4::ZERO, k, SAE);
38696        transmute(r)
38697    }
38698}
38699
38700/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38701/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38702///
38703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665)
38704#[inline]
38705#[target_feature(enable = "avx512f")]
38706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38707#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38708#[rustc_legacy_const_generics(2)]
38709pub fn _mm_max_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38710    unsafe {
38711        static_assert_sae!(SAE);
38712        let a = a.as_f64x2();
38713        let b = b.as_f64x2();
38714        let r = vmaxsd(a, b, f64x2::ZERO, 0b1, SAE);
38715        transmute(r)
38716    }
38717}
38718
38719/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38720/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38721///
38722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663)
38723#[inline]
38724#[target_feature(enable = "avx512f")]
38725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38726#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38727#[rustc_legacy_const_generics(4)]
38728pub fn _mm_mask_max_round_sd<const SAE: i32>(
38729    src: __m128d,
38730    k: __mmask8,
38731    a: __m128d,
38732    b: __m128d,
38733) -> __m128d {
38734    unsafe {
38735        static_assert_sae!(SAE);
38736        let a = a.as_f64x2();
38737        let b = b.as_f64x2();
38738        let src = src.as_f64x2();
38739        let r = vmaxsd(a, b, src, k, SAE);
38740        transmute(r)
38741    }
38742}
38743
38744/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38745/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38746///
38747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_round_sd&expand=3670)
38748#[inline]
38749#[target_feature(enable = "avx512f")]
38750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38751#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38752#[rustc_legacy_const_generics(3)]
38753pub fn _mm_maskz_max_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38754    unsafe {
38755        static_assert_sae!(SAE);
38756        let a = a.as_f64x2();
38757        let b = b.as_f64x2();
38758        let r = vmaxsd(a, b, f64x2::ZERO, k, SAE);
38759        transmute(r)
38760    }
38761}
38762
38763/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38764/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38765///
38766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782)
38767#[inline]
38768#[target_feature(enable = "avx512f")]
38769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38770#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38771#[rustc_legacy_const_generics(2)]
38772pub fn _mm_min_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38773    unsafe {
38774        static_assert_sae!(SAE);
38775        let a = a.as_f32x4();
38776        let b = b.as_f32x4();
38777        let r = vminss(a, b, f32x4::ZERO, 0b1, SAE);
38778        transmute(r)
38779    }
38780}
38781
38782/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38783/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38784///
38785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_ss&expand=3780)
38786#[inline]
38787#[target_feature(enable = "avx512f")]
38788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38789#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38790#[rustc_legacy_const_generics(4)]
38791pub fn _mm_mask_min_round_ss<const SAE: i32>(
38792    src: __m128,
38793    k: __mmask8,
38794    a: __m128,
38795    b: __m128,
38796) -> __m128 {
38797    unsafe {
38798        static_assert_sae!(SAE);
38799        let a = a.as_f32x4();
38800        let b = b.as_f32x4();
38801        let src = src.as_f32x4();
38802        let r = vminss(a, b, src, k, SAE);
38803        transmute(r)
38804    }
38805}
38806
38807/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38808/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38809///
38810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781)
38811#[inline]
38812#[target_feature(enable = "avx512f")]
38813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38814#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38815#[rustc_legacy_const_generics(3)]
38816pub fn _mm_maskz_min_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38817    unsafe {
38818        static_assert_sae!(SAE);
38819        let a = a.as_f32x4();
38820        let b = b.as_f32x4();
38821        let r = vminss(a, b, f32x4::ZERO, k, SAE);
38822        transmute(r)
38823    }
38824}
38825
38826/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
38827/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38828///
38829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779)
38830#[inline]
38831#[target_feature(enable = "avx512f")]
38832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38833#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38834#[rustc_legacy_const_generics(2)]
38835pub fn _mm_min_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38836    unsafe {
38837        static_assert_sae!(SAE);
38838        let a = a.as_f64x2();
38839        let b = b.as_f64x2();
38840        let r = vminsd(a, b, f64x2::ZERO, 0b1, SAE);
38841        transmute(r)
38842    }
38843}
38844
38845/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38846/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38847///
38848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777)
38849#[inline]
38850#[target_feature(enable = "avx512f")]
38851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38852#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38853#[rustc_legacy_const_generics(4)]
38854pub fn _mm_mask_min_round_sd<const SAE: i32>(
38855    src: __m128d,
38856    k: __mmask8,
38857    a: __m128d,
38858    b: __m128d,
38859) -> __m128d {
38860    unsafe {
38861        static_assert_sae!(SAE);
38862        let a = a.as_f64x2();
38863        let b = b.as_f64x2();
38864        let src = src.as_f64x2();
38865        let r = vminsd(a, b, src, k, SAE);
38866        transmute(r)
38867    }
38868}
38869
38870/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38871/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38872///
38873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_sd&expand=3778)
38874#[inline]
38875#[target_feature(enable = "avx512f")]
38876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38877#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38878#[rustc_legacy_const_generics(3)]
38879pub fn _mm_maskz_min_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38880    unsafe {
38881        static_assert_sae!(SAE);
38882        let a = a.as_f64x2();
38883        let b = b.as_f64x2();
38884        let r = vminsd(a, b, f64x2::ZERO, k, SAE);
38885        transmute(r)
38886    }
38887}
38888
38889/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38890///
38891/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38892/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38893/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38894/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38895/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38896/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38897///
38898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383)
38899#[inline]
38900#[target_feature(enable = "avx512f")]
38901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38902#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38903#[rustc_legacy_const_generics(2)]
38904pub fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38905    unsafe {
38906        static_assert_rounding!(ROUNDING);
38907        vsqrtss(a, b, _mm_setzero_ps(), 0b1, ROUNDING)
38908    }
38909}
38910
38911/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38912///
38913/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38914/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38915/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38916/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38917/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38918/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38919///
38920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381)
38921#[inline]
38922#[target_feature(enable = "avx512f")]
38923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38924#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38925#[rustc_legacy_const_generics(4)]
38926pub fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
38927    src: __m128,
38928    k: __mmask8,
38929    a: __m128,
38930    b: __m128,
38931) -> __m128 {
38932    unsafe {
38933        static_assert_rounding!(ROUNDING);
38934        vsqrtss(a, b, src, k, ROUNDING)
38935    }
38936}
38937
38938/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38939///
38940/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38941/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38942/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38943/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38944/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38945/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38946///
38947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382)
38948#[inline]
38949#[target_feature(enable = "avx512f")]
38950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38951#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38952#[rustc_legacy_const_generics(3)]
38953pub fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38954    unsafe {
38955        static_assert_rounding!(ROUNDING);
38956        vsqrtss(a, b, _mm_setzero_ps(), k, ROUNDING)
38957    }
38958}
38959
38960/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38961///
38962/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38963/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38964/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38965/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38966/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38967/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38968///
38969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380)
38970#[inline]
38971#[target_feature(enable = "avx512f")]
38972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38973#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38974#[rustc_legacy_const_generics(2)]
38975pub fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38976    unsafe {
38977        static_assert_rounding!(ROUNDING);
38978        vsqrtsd(a, b, _mm_setzero_pd(), 0b1, ROUNDING)
38979    }
38980}
38981
38982/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38983///
38984/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38985/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38986/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38987/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38988/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38989/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38990///
38991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378)
38992#[inline]
38993#[target_feature(enable = "avx512f")]
38994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38995#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38996#[rustc_legacy_const_generics(4)]
38997pub fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
38998    src: __m128d,
38999    k: __mmask8,
39000    a: __m128d,
39001    b: __m128d,
39002) -> __m128d {
39003    unsafe {
39004        static_assert_rounding!(ROUNDING);
39005        vsqrtsd(a, b, src, k, ROUNDING)
39006    }
39007}
39008
39009/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39010///
39011/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39012/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39013/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39014/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39015/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39016/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39017///
39018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379)
39019#[inline]
39020#[target_feature(enable = "avx512f")]
39021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39022#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
39023#[rustc_legacy_const_generics(3)]
39024pub fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
39025    k: __mmask8,
39026    a: __m128d,
39027    b: __m128d,
39028) -> __m128d {
39029    unsafe {
39030        static_assert_rounding!(ROUNDING);
39031        vsqrtsd(a, b, _mm_setzero_pd(), k, ROUNDING)
39032    }
39033}
39034
39035/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39036/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39037///
39038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856)
39039#[inline]
39040#[target_feature(enable = "avx512f")]
39041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39042#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39043#[rustc_legacy_const_generics(2)]
39044pub fn _mm_getexp_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
39045    unsafe {
39046        static_assert_sae!(SAE);
39047        let a = a.as_f32x4();
39048        let b = b.as_f32x4();
39049        let r = vgetexpss(a, b, f32x4::ZERO, 0b1, SAE);
39050        transmute(r)
39051    }
39052}
39053
39054/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39055/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39056///
39057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857)
39058#[inline]
39059#[target_feature(enable = "avx512f")]
39060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39061#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39062#[rustc_legacy_const_generics(4)]
39063pub fn _mm_mask_getexp_round_ss<const SAE: i32>(
39064    src: __m128,
39065    k: __mmask8,
39066    a: __m128,
39067    b: __m128,
39068) -> __m128 {
39069    unsafe {
39070        static_assert_sae!(SAE);
39071        let a = a.as_f32x4();
39072        let b = b.as_f32x4();
39073        let src = src.as_f32x4();
39074        let r = vgetexpss(a, b, src, k, SAE);
39075        transmute(r)
39076    }
39077}
39078
39079/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39080/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39081///
39082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858)
39083#[inline]
39084#[target_feature(enable = "avx512f")]
39085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39086#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39087#[rustc_legacy_const_generics(3)]
39088pub fn _mm_maskz_getexp_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39089    unsafe {
39090        static_assert_sae!(SAE);
39091        let a = a.as_f32x4();
39092        let b = b.as_f32x4();
39093        let r = vgetexpss(a, b, f32x4::ZERO, k, SAE);
39094        transmute(r)
39095    }
39096}
39097
39098/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39099/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39100///
39101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853)
39102#[inline]
39103#[target_feature(enable = "avx512f")]
39104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39105#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39106#[rustc_legacy_const_generics(2)]
39107pub fn _mm_getexp_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39108    unsafe {
39109        static_assert_sae!(SAE);
39110        let a = a.as_f64x2();
39111        let b = b.as_f64x2();
39112        let r = vgetexpsd(a, b, f64x2::ZERO, 0b1, SAE);
39113        transmute(r)
39114    }
39115}
39116
39117/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39118/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39119///
39120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854)
39121#[inline]
39122#[target_feature(enable = "avx512f")]
39123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39124#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39125#[rustc_legacy_const_generics(4)]
39126pub fn _mm_mask_getexp_round_sd<const SAE: i32>(
39127    src: __m128d,
39128    k: __mmask8,
39129    a: __m128d,
39130    b: __m128d,
39131) -> __m128d {
39132    unsafe {
39133        static_assert_sae!(SAE);
39134        let a = a.as_f64x2();
39135        let b = b.as_f64x2();
39136        let src = src.as_f64x2();
39137        let r = vgetexpsd(a, b, src, k, SAE);
39138        transmute(r)
39139    }
39140}
39141
39142/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39143/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39144///
39145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855)
39146#[inline]
39147#[target_feature(enable = "avx512f")]
39148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39149#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39150#[rustc_legacy_const_generics(3)]
39151pub fn _mm_maskz_getexp_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39152    unsafe {
39153        static_assert_sae!(SAE);
39154        let a = a.as_f64x2();
39155        let b = b.as_f64x2();
39156        let r = vgetexpsd(a, b, f64x2::ZERO, k, SAE);
39157        transmute(r)
39158    }
39159}
39160
39161/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39162/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39163///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39164///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39165///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39166///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39167/// The sign is determined by sc which can take the following values:\
39168///    _MM_MANT_SIGN_src     // sign = sign(src)\
39169///    _MM_MANT_SIGN_zero    // sign = 0\
39170///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39171/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39172///
39173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892)
39174#[inline]
39175#[target_feature(enable = "avx512f")]
39176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39177#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39178#[rustc_legacy_const_generics(2, 3, 4)]
39179pub fn _mm_getmant_round_ss<
39180    const NORM: _MM_MANTISSA_NORM_ENUM,
39181    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39182    const SAE: i32,
39183>(
39184    a: __m128,
39185    b: __m128,
39186) -> __m128 {
39187    unsafe {
39188        static_assert_uimm_bits!(NORM, 4);
39189        static_assert_uimm_bits!(SIGN, 2);
39190        static_assert_mantissas_sae!(SAE);
39191        let a = a.as_f32x4();
39192        let b = b.as_f32x4();
39193        let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, 0b1, SAE);
39194        transmute(r)
39195    }
39196}
39197
39198/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39199/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39200///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39201///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39202///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39203///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39204/// The sign is determined by sc which can take the following values:\
39205///    _MM_MANT_SIGN_src     // sign = sign(src)\
39206///    _MM_MANT_SIGN_zero    // sign = 0\
39207///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39208/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39209///
39210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893)
39211#[inline]
39212#[target_feature(enable = "avx512f")]
39213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39214#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39215#[rustc_legacy_const_generics(4, 5, 6)]
39216pub fn _mm_mask_getmant_round_ss<
39217    const NORM: _MM_MANTISSA_NORM_ENUM,
39218    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39219    const SAE: i32,
39220>(
39221    src: __m128,
39222    k: __mmask8,
39223    a: __m128,
39224    b: __m128,
39225) -> __m128 {
39226    unsafe {
39227        static_assert_uimm_bits!(NORM, 4);
39228        static_assert_uimm_bits!(SIGN, 2);
39229        static_assert_mantissas_sae!(SAE);
39230        let a = a.as_f32x4();
39231        let b = b.as_f32x4();
39232        let src = src.as_f32x4();
39233        let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, SAE);
39234        transmute(r)
39235    }
39236}
39237
39238/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39239/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39240///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39241///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39242///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39243///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39244/// The sign is determined by sc which can take the following values:\
39245///    _MM_MANT_SIGN_src     // sign = sign(src)\
39246///    _MM_MANT_SIGN_zero    // sign = 0\
39247///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39248/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39249///
39250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894)
39251#[inline]
39252#[target_feature(enable = "avx512f")]
39253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39254#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39255#[rustc_legacy_const_generics(3, 4, 5)]
39256pub fn _mm_maskz_getmant_round_ss<
39257    const NORM: _MM_MANTISSA_NORM_ENUM,
39258    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39259    const SAE: i32,
39260>(
39261    k: __mmask8,
39262    a: __m128,
39263    b: __m128,
39264) -> __m128 {
39265    unsafe {
39266        static_assert_uimm_bits!(NORM, 4);
39267        static_assert_uimm_bits!(SIGN, 2);
39268        static_assert_mantissas_sae!(SAE);
39269        let a = a.as_f32x4();
39270        let b = b.as_f32x4();
39271        let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, k, SAE);
39272        transmute(r)
39273    }
39274}
39275
39276/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39277/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39278///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39279///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39280///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39281///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39282/// The sign is determined by sc which can take the following values:\
39283///    _MM_MANT_SIGN_src     // sign = sign(src)\
39284///    _MM_MANT_SIGN_zero    // sign = 0\
39285///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39286/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39287///
39288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889)
39289#[inline]
39290#[target_feature(enable = "avx512f")]
39291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39292#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39293#[rustc_legacy_const_generics(2, 3, 4)]
39294pub fn _mm_getmant_round_sd<
39295    const NORM: _MM_MANTISSA_NORM_ENUM,
39296    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39297    const SAE: i32,
39298>(
39299    a: __m128d,
39300    b: __m128d,
39301) -> __m128d {
39302    unsafe {
39303        static_assert_uimm_bits!(NORM, 4);
39304        static_assert_uimm_bits!(SIGN, 2);
39305        static_assert_mantissas_sae!(SAE);
39306        let a = a.as_f64x2();
39307        let b = b.as_f64x2();
39308        let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, 0b1, SAE);
39309        transmute(r)
39310    }
39311}
39312
39313/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39314/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39315///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39316///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39317///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39318///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39319/// The sign is determined by sc which can take the following values:\
39320///    _MM_MANT_SIGN_src     // sign = sign(src)\
39321///    _MM_MANT_SIGN_zero    // sign = 0\
39322///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39323/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39324///
39325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890)
39326#[inline]
39327#[target_feature(enable = "avx512f")]
39328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39329#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39330#[rustc_legacy_const_generics(4, 5, 6)]
39331pub fn _mm_mask_getmant_round_sd<
39332    const NORM: _MM_MANTISSA_NORM_ENUM,
39333    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39334    const SAE: i32,
39335>(
39336    src: __m128d,
39337    k: __mmask8,
39338    a: __m128d,
39339    b: __m128d,
39340) -> __m128d {
39341    unsafe {
39342        static_assert_uimm_bits!(NORM, 4);
39343        static_assert_uimm_bits!(SIGN, 2);
39344        static_assert_mantissas_sae!(SAE);
39345        let a = a.as_f64x2();
39346        let b = b.as_f64x2();
39347        let src = src.as_f64x2();
39348        let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, SAE);
39349        transmute(r)
39350    }
39351}
39352
39353/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39354/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39355///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39356///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39357///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39358///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39359/// The sign is determined by sc which can take the following values:\
39360///    _MM_MANT_SIGN_src     // sign = sign(src)\
39361///    _MM_MANT_SIGN_zero    // sign = 0\
39362///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39363/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39364///
39365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891)
39366#[inline]
39367#[target_feature(enable = "avx512f")]
39368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39369#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39370#[rustc_legacy_const_generics(3, 4, 5)]
39371pub fn _mm_maskz_getmant_round_sd<
39372    const NORM: _MM_MANTISSA_NORM_ENUM,
39373    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39374    const SAE: i32,
39375>(
39376    k: __mmask8,
39377    a: __m128d,
39378    b: __m128d,
39379) -> __m128d {
39380    unsafe {
39381        static_assert_uimm_bits!(NORM, 4);
39382        static_assert_uimm_bits!(SIGN, 2);
39383        static_assert_mantissas_sae!(SAE);
39384        let a = a.as_f64x2();
39385        let b = b.as_f64x2();
39386        let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, k, SAE);
39387        transmute(r)
39388    }
39389}
39390
39391/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39392/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39393/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39394/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39395/// * [`_MM_FROUND_TO_POS_INF`] : round up
39396/// * [`_MM_FROUND_TO_ZERO`] : truncate
39397/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39398///
39399/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796)
39401#[inline]
39402#[target_feature(enable = "avx512f")]
39403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39404#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39405#[rustc_legacy_const_generics(2, 3)]
39406pub fn _mm_roundscale_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
39407    unsafe {
39408        static_assert_uimm_bits!(IMM8, 8);
39409        static_assert_mantissas_sae!(SAE);
39410        let a = a.as_f32x4();
39411        let b = b.as_f32x4();
39412        let r = vrndscaless(a, b, f32x4::ZERO, 0b11111111, IMM8, SAE);
39413        transmute(r)
39414    }
39415}
39416
39417/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39418/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39419/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39420/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39421/// * [`_MM_FROUND_TO_POS_INF`] : round up
39422/// * [`_MM_FROUND_TO_ZERO`] : truncate
39423/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39424///
39425/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794)
39427#[inline]
39428#[target_feature(enable = "avx512f")]
39429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39430#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39431#[rustc_legacy_const_generics(4, 5)]
39432pub fn _mm_mask_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39433    src: __m128,
39434    k: __mmask8,
39435    a: __m128,
39436    b: __m128,
39437) -> __m128 {
39438    unsafe {
39439        static_assert_uimm_bits!(IMM8, 8);
39440        static_assert_mantissas_sae!(SAE);
39441        let a = a.as_f32x4();
39442        let b = b.as_f32x4();
39443        let src = src.as_f32x4();
39444        let r = vrndscaless(a, b, src, k, IMM8, SAE);
39445        transmute(r)
39446    }
39447}
39448
39449/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39450/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39451/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39452/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39453/// * [`_MM_FROUND_TO_POS_INF`] : round up
39454/// * [`_MM_FROUND_TO_ZERO`] : truncate
39455/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39456///
39457/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795)
39459#[inline]
39460#[target_feature(enable = "avx512f")]
39461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39462#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39463#[rustc_legacy_const_generics(3, 4)]
39464pub fn _mm_maskz_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39465    k: __mmask8,
39466    a: __m128,
39467    b: __m128,
39468) -> __m128 {
39469    unsafe {
39470        static_assert_uimm_bits!(IMM8, 8);
39471        static_assert_mantissas_sae!(SAE);
39472        let a = a.as_f32x4();
39473        let b = b.as_f32x4();
39474        let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, SAE);
39475        transmute(r)
39476    }
39477}
39478
39479/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39480/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39481/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39482/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39483/// * [`_MM_FROUND_TO_POS_INF`] : round up
39484/// * [`_MM_FROUND_TO_ZERO`] : truncate
39485/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39486///
39487/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793)
39489#[inline]
39490#[target_feature(enable = "avx512f")]
39491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39492#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39493#[rustc_legacy_const_generics(2, 3)]
39494pub fn _mm_roundscale_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39495    unsafe {
39496        static_assert_uimm_bits!(IMM8, 8);
39497        static_assert_mantissas_sae!(SAE);
39498        let a = a.as_f64x2();
39499        let b = b.as_f64x2();
39500        let r = vrndscalesd(a, b, f64x2::ZERO, 0b11111111, IMM8, SAE);
39501        transmute(r)
39502    }
39503}
39504
39505/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39506/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39507/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39508/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39509/// * [`_MM_FROUND_TO_POS_INF`] : round up
39510/// * [`_MM_FROUND_TO_ZERO`] : truncate
39511/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39512///
39513/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791)
39515#[inline]
39516#[target_feature(enable = "avx512f")]
39517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39518#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39519#[rustc_legacy_const_generics(4, 5)]
39520pub fn _mm_mask_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39521    src: __m128d,
39522    k: __mmask8,
39523    a: __m128d,
39524    b: __m128d,
39525) -> __m128d {
39526    unsafe {
39527        static_assert_uimm_bits!(IMM8, 8);
39528        static_assert_mantissas_sae!(SAE);
39529        let a = a.as_f64x2();
39530        let b = b.as_f64x2();
39531        let src = src.as_f64x2();
39532        let r = vrndscalesd(a, b, src, k, IMM8, SAE);
39533        transmute(r)
39534    }
39535}
39536
39537/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39538/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39539/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39540/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39541/// * [`_MM_FROUND_TO_POS_INF`] : round up
39542/// * [`_MM_FROUND_TO_ZERO`] : truncate
39543/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39544///
39545/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792)
39547#[inline]
39548#[target_feature(enable = "avx512f")]
39549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39550#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39551#[rustc_legacy_const_generics(3, 4)]
39552pub fn _mm_maskz_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39553    k: __mmask8,
39554    a: __m128d,
39555    b: __m128d,
39556) -> __m128d {
39557    unsafe {
39558        static_assert_uimm_bits!(IMM8, 8);
39559        static_assert_mantissas_sae!(SAE);
39560        let a = a.as_f64x2();
39561        let b = b.as_f64x2();
39562        let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, SAE);
39563        transmute(r)
39564    }
39565}
39566
39567/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39568///
39569/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39570/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39571/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39572/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39573/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39574/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39575///
39576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895)
39577#[inline]
39578#[target_feature(enable = "avx512f")]
39579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39580#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39581#[rustc_legacy_const_generics(2)]
39582pub fn _mm_scalef_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
39583    unsafe {
39584        static_assert_rounding!(ROUNDING);
39585        let a = a.as_f32x4();
39586        let b = b.as_f32x4();
39587        let r = vscalefss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
39588        transmute(r)
39589    }
39590}
39591
39592/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39593///
39594/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39595/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39596/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39597/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39598/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39599/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39600///
39601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893)
39602#[inline]
39603#[target_feature(enable = "avx512f")]
39604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39605#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39606#[rustc_legacy_const_generics(4)]
39607pub fn _mm_mask_scalef_round_ss<const ROUNDING: i32>(
39608    src: __m128,
39609    k: __mmask8,
39610    a: __m128,
39611    b: __m128,
39612) -> __m128 {
39613    unsafe {
39614        static_assert_rounding!(ROUNDING);
39615        let a = a.as_f32x4();
39616        let b = b.as_f32x4();
39617        let src = src.as_f32x4();
39618        let r = vscalefss(a, b, src, k, ROUNDING);
39619        transmute(r)
39620    }
39621}
39622
39623/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39624///
39625/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39626/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39627/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39628/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39629/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39630/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39631///
39632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894)
39633#[inline]
39634#[target_feature(enable = "avx512f")]
39635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39636#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39637#[rustc_legacy_const_generics(3)]
39638pub fn _mm_maskz_scalef_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39639    unsafe {
39640        static_assert_rounding!(ROUNDING);
39641        let a = a.as_f32x4();
39642        let b = b.as_f32x4();
39643        let r = vscalefss(a, b, f32x4::ZERO, k, ROUNDING);
39644        transmute(r)
39645    }
39646}
39647
39648/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39649///
39650/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39651/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39652/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39653/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39654/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39655/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39656///
39657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892)
39658#[inline]
39659#[target_feature(enable = "avx512f")]
39660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39661#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39662#[rustc_legacy_const_generics(2)]
39663pub fn _mm_scalef_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
39664    unsafe {
39665        static_assert_rounding!(ROUNDING);
39666        let a = a.as_f64x2();
39667        let b = b.as_f64x2();
39668        let r = vscalefsd(a, b, f64x2::ZERO, 0b11111111, ROUNDING);
39669        transmute(r)
39670    }
39671}
39672
39673/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39674///
39675/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39676/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39677/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39678/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39679/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39680/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39681///
39682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890)
39683#[inline]
39684#[target_feature(enable = "avx512f")]
39685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39686#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39687#[rustc_legacy_const_generics(4)]
39688pub fn _mm_mask_scalef_round_sd<const ROUNDING: i32>(
39689    src: __m128d,
39690    k: __mmask8,
39691    a: __m128d,
39692    b: __m128d,
39693) -> __m128d {
39694    unsafe {
39695        let a = a.as_f64x2();
39696        let b = b.as_f64x2();
39697        let src = src.as_f64x2();
39698        let r = vscalefsd(a, b, src, k, ROUNDING);
39699        transmute(r)
39700    }
39701}
39702
39703/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39704///
39705/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39706/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39707/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39708/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39709/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39710/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39711///
39712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891)
39713#[inline]
39714#[target_feature(enable = "avx512f")]
39715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39716#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39717#[rustc_legacy_const_generics(3)]
39718pub fn _mm_maskz_scalef_round_sd<const ROUNDING: i32>(
39719    k: __mmask8,
39720    a: __m128d,
39721    b: __m128d,
39722) -> __m128d {
39723    unsafe {
39724        static_assert_rounding!(ROUNDING);
39725        let a = a.as_f64x2();
39726        let b = b.as_f64x2();
39727        let r = vscalefsd(a, b, f64x2::ZERO, k, ROUNDING);
39728        transmute(r)
39729    }
39730}
39731
39732/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39733///
39734/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39735/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39736/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39737/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39738/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39739/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39740///
39741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573)
39742#[inline]
39743#[target_feature(enable = "avx512f")]
39744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39745#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39746#[rustc_legacy_const_generics(3)]
39747pub fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
39748    unsafe {
39749        static_assert_rounding!(ROUNDING);
39750        let extracta: f32 = simd_extract!(a, 0);
39751        let extractb: f32 = simd_extract!(b, 0);
39752        let extractc: f32 = simd_extract!(c, 0);
39753        let r = vfmaddssround(extracta, extractb, extractc, ROUNDING);
39754        simd_insert!(a, 0, r)
39755    }
39756}
39757
39758/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39759///
39760/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39761/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39762/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39763/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39764/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39765/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39766///
39767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574)
39768#[inline]
39769#[target_feature(enable = "avx512f")]
39770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39771#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39772#[rustc_legacy_const_generics(4)]
39773pub fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
39774    a: __m128,
39775    k: __mmask8,
39776    b: __m128,
39777    c: __m128,
39778) -> __m128 {
39779    unsafe {
39780        static_assert_rounding!(ROUNDING);
39781        let mut fmadd: f32 = simd_extract!(a, 0);
39782        if (k & 0b00000001) != 0 {
39783            let extractb: f32 = simd_extract!(b, 0);
39784            let extractc: f32 = simd_extract!(c, 0);
39785            fmadd = vfmaddssround(fmadd, extractb, extractc, ROUNDING);
39786        }
39787        simd_insert!(a, 0, fmadd)
39788    }
39789}
39790
39791/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39792///
39793/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39794/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39795/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39796/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39797/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39798/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39799///
39800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576)
39801#[inline]
39802#[target_feature(enable = "avx512f")]
39803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39804#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39805#[rustc_legacy_const_generics(4)]
39806pub fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
39807    k: __mmask8,
39808    a: __m128,
39809    b: __m128,
39810    c: __m128,
39811) -> __m128 {
39812    unsafe {
39813        static_assert_rounding!(ROUNDING);
39814        let mut fmadd: f32 = 0.;
39815        if (k & 0b00000001) != 0 {
39816            let extracta: f32 = simd_extract!(a, 0);
39817            let extractb: f32 = simd_extract!(b, 0);
39818            let extractc: f32 = simd_extract!(c, 0);
39819            fmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
39820        }
39821        simd_insert!(a, 0, fmadd)
39822    }
39823}
39824
39825/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
39826///
39827/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39828/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39829/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39830/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39831/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39832/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39833///
39834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575)
39835#[inline]
39836#[target_feature(enable = "avx512f")]
39837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39838#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39839#[rustc_legacy_const_generics(4)]
39840pub fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
39841    a: __m128,
39842    b: __m128,
39843    c: __m128,
39844    k: __mmask8,
39845) -> __m128 {
39846    unsafe {
39847        static_assert_rounding!(ROUNDING);
39848        let mut fmadd: f32 = simd_extract!(c, 0);
39849        if (k & 0b00000001) != 0 {
39850            let extracta: f32 = simd_extract!(a, 0);
39851            let extractb: f32 = simd_extract!(b, 0);
39852            fmadd = vfmaddssround(extracta, extractb, fmadd, ROUNDING);
39853        }
39854        simd_insert!(c, 0, fmadd)
39855    }
39856}
39857
39858/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39859///
39860/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39861/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39862/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39863/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39864/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39865/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39866///
39867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569)
39868#[inline]
39869#[target_feature(enable = "avx512f")]
39870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39871#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39872#[rustc_legacy_const_generics(3)]
39873pub fn _mm_fmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39874    unsafe {
39875        static_assert_rounding!(ROUNDING);
39876        let extracta: f64 = simd_extract!(a, 0);
39877        let extractb: f64 = simd_extract!(b, 0);
39878        let extractc: f64 = simd_extract!(c, 0);
39879        let fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
39880        simd_insert!(a, 0, fmadd)
39881    }
39882}
39883
39884/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39885///
39886/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39887/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39888/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39889/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39890/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39891/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39892///
39893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570)
39894#[inline]
39895#[target_feature(enable = "avx512f")]
39896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39897#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39898#[rustc_legacy_const_generics(4)]
39899pub fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
39900    a: __m128d,
39901    k: __mmask8,
39902    b: __m128d,
39903    c: __m128d,
39904) -> __m128d {
39905    unsafe {
39906        static_assert_rounding!(ROUNDING);
39907        let mut fmadd: f64 = simd_extract!(a, 0);
39908        if (k & 0b00000001) != 0 {
39909            let extractb: f64 = simd_extract!(b, 0);
39910            let extractc: f64 = simd_extract!(c, 0);
39911            fmadd = vfmaddsdround(fmadd, extractb, extractc, ROUNDING);
39912        }
39913        simd_insert!(a, 0, fmadd)
39914    }
39915}
39916
39917/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39918///
39919/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39920/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39921/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39922/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39923/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39924/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39925///
39926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572)
39927#[inline]
39928#[target_feature(enable = "avx512f")]
39929#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39930#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39931#[rustc_legacy_const_generics(4)]
39932pub fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
39933    k: __mmask8,
39934    a: __m128d,
39935    b: __m128d,
39936    c: __m128d,
39937) -> __m128d {
39938    unsafe {
39939        static_assert_rounding!(ROUNDING);
39940        let mut fmadd: f64 = 0.;
39941        if (k & 0b00000001) != 0 {
39942            let extracta: f64 = simd_extract!(a, 0);
39943            let extractb: f64 = simd_extract!(b, 0);
39944            let extractc: f64 = simd_extract!(c, 0);
39945            fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
39946        }
39947        simd_insert!(a, 0, fmadd)
39948    }
39949}
39950
39951/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
39952///
39953/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39954/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39955/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39956/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39957/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39958/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39959///
39960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_sd&expand=2571)
39961#[inline]
39962#[target_feature(enable = "avx512f")]
39963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39964#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39965#[rustc_legacy_const_generics(4)]
39966pub fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
39967    a: __m128d,
39968    b: __m128d,
39969    c: __m128d,
39970    k: __mmask8,
39971) -> __m128d {
39972    unsafe {
39973        static_assert_rounding!(ROUNDING);
39974        let mut fmadd: f64 = simd_extract!(c, 0);
39975        if (k & 0b00000001) != 0 {
39976            let extracta: f64 = simd_extract!(a, 0);
39977            let extractb: f64 = simd_extract!(b, 0);
39978            fmadd = vfmaddsdround(extracta, extractb, fmadd, ROUNDING);
39979        }
39980        simd_insert!(c, 0, fmadd)
39981    }
39982}
39983
39984/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39985///
39986/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39987/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39988/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39989/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39990/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39991/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39992///
39993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659)
39994#[inline]
39995#[target_feature(enable = "avx512f")]
39996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39997#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
39998#[rustc_legacy_const_generics(3)]
39999pub fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40000    unsafe {
40001        static_assert_rounding!(ROUNDING);
40002        let extracta: f32 = simd_extract!(a, 0);
40003        let extractb: f32 = simd_extract!(b, 0);
40004        let extractc: f32 = simd_extract!(c, 0);
40005        let extractc = -extractc;
40006        let fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40007        simd_insert!(a, 0, fmsub)
40008    }
40009}
40010
40011/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40012///
40013/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40014/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40015/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40016/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40017/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40018/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40019///
40020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660)
40021#[inline]
40022#[target_feature(enable = "avx512f")]
40023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40024#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40025#[rustc_legacy_const_generics(4)]
40026pub fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
40027    a: __m128,
40028    k: __mmask8,
40029    b: __m128,
40030    c: __m128,
40031) -> __m128 {
40032    unsafe {
40033        static_assert_rounding!(ROUNDING);
40034        let mut fmsub: f32 = simd_extract!(a, 0);
40035        if (k & 0b00000001) != 0 {
40036            let extractb: f32 = simd_extract!(b, 0);
40037            let extractc: f32 = simd_extract!(c, 0);
40038            let extractc = -extractc;
40039            fmsub = vfmaddssround(fmsub, extractb, extractc, ROUNDING);
40040        }
40041        simd_insert!(a, 0, fmsub)
40042    }
40043}
40044
40045/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40046///
40047/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40048/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40049/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40050/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40051/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40052/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40053///
40054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662)
40055#[inline]
40056#[target_feature(enable = "avx512f")]
40057#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40058#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40059#[rustc_legacy_const_generics(4)]
40060pub fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
40061    k: __mmask8,
40062    a: __m128,
40063    b: __m128,
40064    c: __m128,
40065) -> __m128 {
40066    unsafe {
40067        static_assert_rounding!(ROUNDING);
40068        let mut fmsub: f32 = 0.;
40069        if (k & 0b00000001) != 0 {
40070            let extracta: f32 = simd_extract!(a, 0);
40071            let extractb: f32 = simd_extract!(b, 0);
40072            let extractc: f32 = simd_extract!(c, 0);
40073            let extractc = -extractc;
40074            fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40075        }
40076        simd_insert!(a, 0, fmsub)
40077    }
40078}
40079
40080/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40081///
40082/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40083/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40084/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40085/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40086/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40087/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40088///
40089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661)
40090#[inline]
40091#[target_feature(enable = "avx512f")]
40092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40093#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40094#[rustc_legacy_const_generics(4)]
40095pub fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
40096    a: __m128,
40097    b: __m128,
40098    c: __m128,
40099    k: __mmask8,
40100) -> __m128 {
40101    unsafe {
40102        static_assert_rounding!(ROUNDING);
40103        let mut fmsub: f32 = simd_extract!(c, 0);
40104        if (k & 0b00000001) != 0 {
40105            let extracta: f32 = simd_extract!(a, 0);
40106            let extractb: f32 = simd_extract!(b, 0);
40107            let extractc = -fmsub;
40108            fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40109        }
40110        simd_insert!(c, 0, fmsub)
40111    }
40112}
40113
40114/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40115///
40116/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40117/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40118/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40119/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40120/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40121/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40122///
40123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655)
40124#[inline]
40125#[target_feature(enable = "avx512f")]
40126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40127#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40128#[rustc_legacy_const_generics(3)]
40129pub fn _mm_fmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40130    unsafe {
40131        static_assert_rounding!(ROUNDING);
40132        let extracta: f64 = simd_extract!(a, 0);
40133        let extractb: f64 = simd_extract!(b, 0);
40134        let extractc: f64 = simd_extract!(c, 0);
40135        let extractc = -extractc;
40136        let fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40137        simd_insert!(a, 0, fmsub)
40138    }
40139}
40140
40141/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40142///
40143/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40144/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40145/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40146/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40147/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40148/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40149///
40150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656)
40151#[inline]
40152#[target_feature(enable = "avx512f")]
40153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40154#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40155#[rustc_legacy_const_generics(4)]
40156pub fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
40157    a: __m128d,
40158    k: __mmask8,
40159    b: __m128d,
40160    c: __m128d,
40161) -> __m128d {
40162    unsafe {
40163        static_assert_rounding!(ROUNDING);
40164        let mut fmsub: f64 = simd_extract!(a, 0);
40165        if (k & 0b00000001) != 0 {
40166            let extractb: f64 = simd_extract!(b, 0);
40167            let extractc: f64 = simd_extract!(c, 0);
40168            let extractc = -extractc;
40169            fmsub = vfmaddsdround(fmsub, extractb, extractc, ROUNDING);
40170        }
40171        simd_insert!(a, 0, fmsub)
40172    }
40173}
40174
40175/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40176///
40177/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40178/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40179/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40180/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40181/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40182/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40183///
40184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658)
40185#[inline]
40186#[target_feature(enable = "avx512f")]
40187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40188#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40189#[rustc_legacy_const_generics(4)]
40190pub fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
40191    k: __mmask8,
40192    a: __m128d,
40193    b: __m128d,
40194    c: __m128d,
40195) -> __m128d {
40196    unsafe {
40197        static_assert_rounding!(ROUNDING);
40198        let mut fmsub: f64 = 0.;
40199        if (k & 0b00000001) != 0 {
40200            let extracta: f64 = simd_extract!(a, 0);
40201            let extractb: f64 = simd_extract!(b, 0);
40202            let extractc: f64 = simd_extract!(c, 0);
40203            let extractc = -extractc;
40204            fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40205        }
40206        simd_insert!(a, 0, fmsub)
40207    }
40208}
40209
40210/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40211///
40212/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40213/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40214/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40215/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40216/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40217/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40218///
40219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657)
40220#[inline]
40221#[target_feature(enable = "avx512f")]
40222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40223#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40224#[rustc_legacy_const_generics(4)]
40225pub fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
40226    a: __m128d,
40227    b: __m128d,
40228    c: __m128d,
40229    k: __mmask8,
40230) -> __m128d {
40231    unsafe {
40232        static_assert_rounding!(ROUNDING);
40233        let mut fmsub: f64 = simd_extract!(c, 0);
40234        if (k & 0b00000001) != 0 {
40235            let extracta: f64 = simd_extract!(a, 0);
40236            let extractb: f64 = simd_extract!(b, 0);
40237            let extractc = -fmsub;
40238            fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40239        }
40240        simd_insert!(c, 0, fmsub)
40241    }
40242}
40243
40244/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40245///
40246/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40247/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40248/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40249/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40250/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40251/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40252///
40253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739)
40254#[inline]
40255#[target_feature(enable = "avx512f")]
40256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40257#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40258#[rustc_legacy_const_generics(3)]
40259pub fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40260    unsafe {
40261        static_assert_rounding!(ROUNDING);
40262        let extracta: f32 = simd_extract!(a, 0);
40263        let extracta = -extracta;
40264        let extractb: f32 = simd_extract!(b, 0);
40265        let extractc: f32 = simd_extract!(c, 0);
40266        let fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40267        simd_insert!(a, 0, fnmadd)
40268    }
40269}
40270
40271/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40272///
40273/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40274/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40275/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40276/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40277/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40278/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40279///
40280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740)
40281#[inline]
40282#[target_feature(enable = "avx512f")]
40283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40284#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40285#[rustc_legacy_const_generics(4)]
40286pub fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
40287    a: __m128,
40288    k: __mmask8,
40289    b: __m128,
40290    c: __m128,
40291) -> __m128 {
40292    unsafe {
40293        static_assert_rounding!(ROUNDING);
40294        let mut fnmadd: f32 = simd_extract!(a, 0);
40295        if (k & 0b00000001) != 0 {
40296            let extracta = -fnmadd;
40297            let extractb: f32 = simd_extract!(b, 0);
40298            let extractc: f32 = simd_extract!(c, 0);
40299            fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40300        }
40301        simd_insert!(a, 0, fnmadd)
40302    }
40303}
40304
40305/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40306///
40307/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40308/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40309/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40310/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40311/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40312/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40313///
40314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742)
40315#[inline]
40316#[target_feature(enable = "avx512f")]
40317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40318#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40319#[rustc_legacy_const_generics(4)]
40320pub fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
40321    k: __mmask8,
40322    a: __m128,
40323    b: __m128,
40324    c: __m128,
40325) -> __m128 {
40326    unsafe {
40327        static_assert_rounding!(ROUNDING);
40328        let mut fnmadd: f32 = 0.;
40329        if (k & 0b00000001) != 0 {
40330            let extracta: f32 = simd_extract!(a, 0);
40331            let extracta = -extracta;
40332            let extractb: f32 = simd_extract!(b, 0);
40333            let extractc: f32 = simd_extract!(c, 0);
40334            fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40335        }
40336        simd_insert!(a, 0, fnmadd)
40337    }
40338}
40339
40340/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40341///
40342/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40343/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40344/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40345/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40346/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40347/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40348///
40349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741)
40350#[inline]
40351#[target_feature(enable = "avx512f")]
40352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40353#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40354#[rustc_legacy_const_generics(4)]
40355pub fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
40356    a: __m128,
40357    b: __m128,
40358    c: __m128,
40359    k: __mmask8,
40360) -> __m128 {
40361    unsafe {
40362        static_assert_rounding!(ROUNDING);
40363        let mut fnmadd: f32 = simd_extract!(c, 0);
40364        if (k & 0b00000001) != 0 {
40365            let extracta: f32 = simd_extract!(a, 0);
40366            let extracta = -extracta;
40367            let extractb: f32 = simd_extract!(b, 0);
40368            fnmadd = vfmaddssround(extracta, extractb, fnmadd, ROUNDING);
40369        }
40370        simd_insert!(c, 0, fnmadd)
40371    }
40372}
40373
40374/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40375///
40376/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40377/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40378/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40379/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40380/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40381/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40382///
40383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735)
40384#[inline]
40385#[target_feature(enable = "avx512f")]
40386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40387#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40388#[rustc_legacy_const_generics(3)]
40389pub fn _mm_fnmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40390    unsafe {
40391        static_assert_rounding!(ROUNDING);
40392        let extracta: f64 = simd_extract!(a, 0);
40393        let extracta = -extracta;
40394        let extractb: f64 = simd_extract!(b, 0);
40395        let extractc: f64 = simd_extract!(c, 0);
40396        let fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40397        simd_insert!(a, 0, fnmadd)
40398    }
40399}
40400
40401/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40402///
40403/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40404/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40405/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40406/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40407/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40408/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40409///
40410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736)
40411#[inline]
40412#[target_feature(enable = "avx512f")]
40413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40414#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40415#[rustc_legacy_const_generics(4)]
40416pub fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
40417    a: __m128d,
40418    k: __mmask8,
40419    b: __m128d,
40420    c: __m128d,
40421) -> __m128d {
40422    unsafe {
40423        static_assert_rounding!(ROUNDING);
40424        let mut fnmadd: f64 = simd_extract!(a, 0);
40425        if (k & 0b00000001) != 0 {
40426            let extracta = -fnmadd;
40427            let extractb: f64 = simd_extract!(b, 0);
40428            let extractc: f64 = simd_extract!(c, 0);
40429            fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40430        }
40431        simd_insert!(a, 0, fnmadd)
40432    }
40433}
40434
40435/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40436///
40437/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40438/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40439/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40440/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40441/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40442/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40443///
40444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738)
40445#[inline]
40446#[target_feature(enable = "avx512f")]
40447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40448#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40449#[rustc_legacy_const_generics(4)]
40450pub fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
40451    k: __mmask8,
40452    a: __m128d,
40453    b: __m128d,
40454    c: __m128d,
40455) -> __m128d {
40456    unsafe {
40457        static_assert_rounding!(ROUNDING);
40458        let mut fnmadd: f64 = 0.;
40459        if (k & 0b00000001) != 0 {
40460            let extracta: f64 = simd_extract!(a, 0);
40461            let extracta = -extracta;
40462            let extractb: f64 = simd_extract!(b, 0);
40463            let extractc: f64 = simd_extract!(c, 0);
40464            fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40465        }
40466        simd_insert!(a, 0, fnmadd)
40467    }
40468}
40469
40470/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40471///
40472/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40473/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40474/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40475/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40476/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40477/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40478///
40479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_sd&expand=2737)
40480#[inline]
40481#[target_feature(enable = "avx512f")]
40482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40483#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40484#[rustc_legacy_const_generics(4)]
40485pub fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
40486    a: __m128d,
40487    b: __m128d,
40488    c: __m128d,
40489    k: __mmask8,
40490) -> __m128d {
40491    unsafe {
40492        static_assert_rounding!(ROUNDING);
40493        let mut fnmadd: f64 = simd_extract!(c, 0);
40494        if (k & 0b00000001) != 0 {
40495            let extracta: f64 = simd_extract!(a, 0);
40496            let extracta = -extracta;
40497            let extractb: f64 = simd_extract!(b, 0);
40498            fnmadd = vfmaddsdround(extracta, extractb, fnmadd, ROUNDING);
40499        }
40500        simd_insert!(c, 0, fnmadd)
40501    }
40502}
40503
40504/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40505///
40506/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40507/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40508/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40509/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40510/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40511/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40512///
40513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787)
40514#[inline]
40515#[target_feature(enable = "avx512f")]
40516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40517#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40518#[rustc_legacy_const_generics(3)]
40519pub fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40520    unsafe {
40521        static_assert_rounding!(ROUNDING);
40522        let extracta: f32 = simd_extract!(a, 0);
40523        let extracta = -extracta;
40524        let extractb: f32 = simd_extract!(b, 0);
40525        let extractc: f32 = simd_extract!(c, 0);
40526        let extractc = -extractc;
40527        let fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40528        simd_insert!(a, 0, fnmsub)
40529    }
40530}
40531
40532/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40533///
40534/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40535/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40536/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40537/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40538/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40539/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40540///
40541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788)
40542#[inline]
40543#[target_feature(enable = "avx512f")]
40544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40545#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40546#[rustc_legacy_const_generics(4)]
40547pub fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
40548    a: __m128,
40549    k: __mmask8,
40550    b: __m128,
40551    c: __m128,
40552) -> __m128 {
40553    unsafe {
40554        static_assert_rounding!(ROUNDING);
40555        let mut fnmsub: f32 = simd_extract!(a, 0);
40556        if (k & 0b00000001) != 0 {
40557            let extracta = -fnmsub;
40558            let extractb: f32 = simd_extract!(b, 0);
40559            let extractc: f32 = simd_extract!(c, 0);
40560            let extractc = -extractc;
40561            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40562        }
40563        simd_insert!(a, 0, fnmsub)
40564    }
40565}
40566
40567/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40568///
40569/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40570/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40571/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40572/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40573/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40574/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40575///
40576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790)
40577#[inline]
40578#[target_feature(enable = "avx512f")]
40579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40580#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40581#[rustc_legacy_const_generics(4)]
40582pub fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
40583    k: __mmask8,
40584    a: __m128,
40585    b: __m128,
40586    c: __m128,
40587) -> __m128 {
40588    unsafe {
40589        static_assert_rounding!(ROUNDING);
40590        let mut fnmsub: f32 = 0.;
40591        if (k & 0b00000001) != 0 {
40592            let extracta: f32 = simd_extract!(a, 0);
40593            let extracta = -extracta;
40594            let extractb: f32 = simd_extract!(b, 0);
40595            let extractc: f32 = simd_extract!(c, 0);
40596            let extractc = -extractc;
40597            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40598        }
40599        simd_insert!(a, 0, fnmsub)
40600    }
40601}
40602
40603/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40604///
40605/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40606/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40607/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40608/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40609/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40610/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40611///
40612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789)
40613#[inline]
40614#[target_feature(enable = "avx512f")]
40615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40616#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40617#[rustc_legacy_const_generics(4)]
40618pub fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
40619    a: __m128,
40620    b: __m128,
40621    c: __m128,
40622    k: __mmask8,
40623) -> __m128 {
40624    unsafe {
40625        static_assert_rounding!(ROUNDING);
40626        let mut fnmsub: f32 = simd_extract!(c, 0);
40627        if (k & 0b00000001) != 0 {
40628            let extracta: f32 = simd_extract!(a, 0);
40629            let extracta = -extracta;
40630            let extractb: f32 = simd_extract!(b, 0);
40631            let extractc = -fnmsub;
40632            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40633        }
40634        simd_insert!(c, 0, fnmsub)
40635    }
40636}
40637
40638/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40639///
40640/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40641/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40642/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40643/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40644/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40645/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40646///
40647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783)
40648#[inline]
40649#[target_feature(enable = "avx512f")]
40650#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40651#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40652#[rustc_legacy_const_generics(3)]
40653pub fn _mm_fnmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40654    unsafe {
40655        static_assert_rounding!(ROUNDING);
40656        let extracta: f64 = simd_extract!(a, 0);
40657        let extracta = -extracta;
40658        let extractb: f64 = simd_extract!(b, 0);
40659        let extractc: f64 = simd_extract!(c, 0);
40660        let extractc = -extractc;
40661        let fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40662        simd_insert!(a, 0, fnmsub)
40663    }
40664}
40665
40666/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40667///
40668/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40669/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40670/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40671/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40672/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40673/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40674///
40675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784)
40676#[inline]
40677#[target_feature(enable = "avx512f")]
40678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40679#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40680#[rustc_legacy_const_generics(4)]
40681pub fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
40682    a: __m128d,
40683    k: __mmask8,
40684    b: __m128d,
40685    c: __m128d,
40686) -> __m128d {
40687    unsafe {
40688        static_assert_rounding!(ROUNDING);
40689        let mut fnmsub: f64 = simd_extract!(a, 0);
40690        if (k & 0b00000001) != 0 {
40691            let extracta = -fnmsub;
40692            let extractb: f64 = simd_extract!(b, 0);
40693            let extractc: f64 = simd_extract!(c, 0);
40694            let extractc = -extractc;
40695            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40696        }
40697        simd_insert!(a, 0, fnmsub)
40698    }
40699}
40700
40701/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40702///
40703/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40704/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40705/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40706/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40707/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40708/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40709///
40710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786)
40711#[inline]
40712#[target_feature(enable = "avx512f")]
40713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40714#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40715#[rustc_legacy_const_generics(4)]
40716pub fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
40717    k: __mmask8,
40718    a: __m128d,
40719    b: __m128d,
40720    c: __m128d,
40721) -> __m128d {
40722    unsafe {
40723        static_assert_rounding!(ROUNDING);
40724        let mut fnmsub: f64 = 0.;
40725        if (k & 0b00000001) != 0 {
40726            let extracta: f64 = simd_extract!(a, 0);
40727            let extracta = -extracta;
40728            let extractb: f64 = simd_extract!(b, 0);
40729            let extractc: f64 = simd_extract!(c, 0);
40730            let extractc = -extractc;
40731            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40732        }
40733        simd_insert!(a, 0, fnmsub)
40734    }
40735}
40736
40737/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40738///
40739/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40740/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40741/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40742/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40743/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40744/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40745///
40746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785)
40747#[inline]
40748#[target_feature(enable = "avx512f")]
40749#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40750#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40751#[rustc_legacy_const_generics(4)]
40752pub fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
40753    a: __m128d,
40754    b: __m128d,
40755    c: __m128d,
40756    k: __mmask8,
40757) -> __m128d {
40758    unsafe {
40759        static_assert_rounding!(ROUNDING);
40760        let mut fnmsub: f64 = simd_extract!(c, 0);
40761        if (k & 0b00000001) != 0 {
40762            let extracta: f64 = simd_extract!(a, 0);
40763            let extracta = -extracta;
40764            let extractb: f64 = simd_extract!(b, 0);
40765            let extractc = -fnmsub;
40766            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40767        }
40768        simd_insert!(c, 0, fnmsub)
40769    }
40770}
40771
40772/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40773///
40774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
40775#[inline]
40776#[target_feature(enable = "avx512f")]
40777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40778#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40779#[rustc_legacy_const_generics(3)]
40780pub fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
40781    unsafe {
40782        static_assert_uimm_bits!(IMM8, 8);
40783        let a = a.as_f32x4();
40784        let b = b.as_f32x4();
40785        let c = c.as_i32x4();
40786        let r = vfixupimmss(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
40787        let fixupimm: f32 = simd_extract!(r, 0);
40788        let r = simd_insert!(a, 0, fixupimm);
40789        transmute(r)
40790    }
40791}
40792
40793/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40794///
40795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
40796#[inline]
40797#[target_feature(enable = "avx512f")]
40798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40799#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40800#[rustc_legacy_const_generics(4)]
40801pub fn _mm_mask_fixupimm_ss<const IMM8: i32>(
40802    a: __m128,
40803    k: __mmask8,
40804    b: __m128,
40805    c: __m128i,
40806) -> __m128 {
40807    unsafe {
40808        static_assert_uimm_bits!(IMM8, 8);
40809        let a = a.as_f32x4();
40810        let b = b.as_f32x4();
40811        let c = c.as_i32x4();
40812        let fixupimm = vfixupimmss(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40813        let fixupimm: f32 = simd_extract!(fixupimm, 0);
40814        let r = simd_insert!(a, 0, fixupimm);
40815        transmute(r)
40816    }
40817}
40818
40819/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40820///
40821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
40822#[inline]
40823#[target_feature(enable = "avx512f")]
40824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40825#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40826#[rustc_legacy_const_generics(4)]
40827pub fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
40828    k: __mmask8,
40829    a: __m128,
40830    b: __m128,
40831    c: __m128i,
40832) -> __m128 {
40833    unsafe {
40834        static_assert_uimm_bits!(IMM8, 8);
40835        let a = a.as_f32x4();
40836        let b = b.as_f32x4();
40837        let c = c.as_i32x4();
40838        let fixupimm = vfixupimmssz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40839        let fixupimm: f32 = simd_extract!(fixupimm, 0);
40840        let r = simd_insert!(a, 0, fixupimm);
40841        transmute(r)
40842    }
40843}
40844
40845/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40846///
40847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
40848#[inline]
40849#[target_feature(enable = "avx512f")]
40850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40851#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40852#[rustc_legacy_const_generics(3)]
40853pub fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
40854    unsafe {
40855        static_assert_uimm_bits!(IMM8, 8);
40856        let a = a.as_f64x2();
40857        let b = b.as_f64x2();
40858        let c = c.as_i64x2();
40859        let fixupimm = vfixupimmsd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
40860        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40861        let r = simd_insert!(a, 0, fixupimm);
40862        transmute(r)
40863    }
40864}
40865
40866/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40867///
40868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
40869#[inline]
40870#[target_feature(enable = "avx512f")]
40871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40872#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40873#[rustc_legacy_const_generics(4)]
40874pub fn _mm_mask_fixupimm_sd<const IMM8: i32>(
40875    a: __m128d,
40876    k: __mmask8,
40877    b: __m128d,
40878    c: __m128i,
40879) -> __m128d {
40880    unsafe {
40881        static_assert_uimm_bits!(IMM8, 8);
40882        let a = a.as_f64x2();
40883        let b = b.as_f64x2();
40884        let c = c.as_i64x2();
40885        let fixupimm = vfixupimmsd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40886        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40887        let r = simd_insert!(a, 0, fixupimm);
40888        transmute(r)
40889    }
40890}
40891
40892/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40893///
40894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
40895#[inline]
40896#[target_feature(enable = "avx512f")]
40897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40898#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40899#[rustc_legacy_const_generics(4)]
40900pub fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
40901    k: __mmask8,
40902    a: __m128d,
40903    b: __m128d,
40904    c: __m128i,
40905) -> __m128d {
40906    unsafe {
40907        static_assert_uimm_bits!(IMM8, 8);
40908        let a = a.as_f64x2();
40909        let b = b.as_f64x2();
40910        let c = c.as_i64x2();
40911        let fixupimm = vfixupimmsdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40912        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40913        let r = simd_insert!(a, 0, fixupimm);
40914        transmute(r)
40915    }
40916}
40917
40918/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40919/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40920///
40921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
40922#[inline]
40923#[target_feature(enable = "avx512f")]
40924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40925#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40926#[rustc_legacy_const_generics(3, 4)]
40927pub fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40928    a: __m128,
40929    b: __m128,
40930    c: __m128i,
40931) -> __m128 {
40932    unsafe {
40933        static_assert_uimm_bits!(IMM8, 8);
40934        static_assert_mantissas_sae!(SAE);
40935        let a = a.as_f32x4();
40936        let b = b.as_f32x4();
40937        let c = c.as_i32x4();
40938        let r = vfixupimmss(a, b, c, IMM8, 0b11111111, SAE);
40939        let fixupimm: f32 = simd_extract!(r, 0);
40940        let r = simd_insert!(a, 0, fixupimm);
40941        transmute(r)
40942    }
40943}
40944
40945/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40946/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40947///
40948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
40949#[inline]
40950#[target_feature(enable = "avx512f")]
40951#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40952#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40953#[rustc_legacy_const_generics(4, 5)]
40954pub fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40955    a: __m128,
40956    k: __mmask8,
40957    b: __m128,
40958    c: __m128i,
40959) -> __m128 {
40960    unsafe {
40961        static_assert_uimm_bits!(IMM8, 8);
40962        static_assert_mantissas_sae!(SAE);
40963        let a = a.as_f32x4();
40964        let b = b.as_f32x4();
40965        let c = c.as_i32x4();
40966        let r = vfixupimmss(a, b, c, IMM8, k, SAE);
40967        let fixupimm: f32 = simd_extract!(r, 0);
40968        let r = simd_insert!(a, 0, fixupimm);
40969        transmute(r)
40970    }
40971}
40972
40973/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40974/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40975///
40976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
40977#[inline]
40978#[target_feature(enable = "avx512f")]
40979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40980#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40981#[rustc_legacy_const_generics(4, 5)]
40982pub fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40983    k: __mmask8,
40984    a: __m128,
40985    b: __m128,
40986    c: __m128i,
40987) -> __m128 {
40988    unsafe {
40989        static_assert_uimm_bits!(IMM8, 8);
40990        static_assert_mantissas_sae!(SAE);
40991        let a = a.as_f32x4();
40992        let b = b.as_f32x4();
40993        let c = c.as_i32x4();
40994        let r = vfixupimmssz(a, b, c, IMM8, k, SAE);
40995        let fixupimm: f32 = simd_extract!(r, 0);
40996        let r = simd_insert!(a, 0, fixupimm);
40997        transmute(r)
40998    }
40999}
41000
41001/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
41002/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41003///
41004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
41005#[inline]
41006#[target_feature(enable = "avx512f")]
41007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41008#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
41009#[rustc_legacy_const_generics(3, 4)]
41010pub fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
41011    a: __m128d,
41012    b: __m128d,
41013    c: __m128i,
41014) -> __m128d {
41015    unsafe {
41016        static_assert_uimm_bits!(IMM8, 8);
41017        static_assert_mantissas_sae!(SAE);
41018        let a = a.as_f64x2();
41019        let b = b.as_f64x2();
41020        let c = c.as_i64x2();
41021        let r = vfixupimmsd(a, b, c, IMM8, 0b11111111, SAE);
41022        let fixupimm: f64 = simd_extract!(r, 0);
41023        let r = simd_insert!(a, 0, fixupimm);
41024        transmute(r)
41025    }
41026}
41027
41028/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
41029/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41030///
41031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
41032#[inline]
41033#[target_feature(enable = "avx512f")]
41034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41035#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
41036#[rustc_legacy_const_generics(4, 5)]
41037pub fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
41038    a: __m128d,
41039    k: __mmask8,
41040    b: __m128d,
41041    c: __m128i,
41042) -> __m128d {
41043    unsafe {
41044        static_assert_uimm_bits!(IMM8, 8);
41045        static_assert_mantissas_sae!(SAE);
41046        let a = a.as_f64x2();
41047        let b = b.as_f64x2();
41048        let c = c.as_i64x2();
41049        let r = vfixupimmsd(a, b, c, IMM8, k, SAE);
41050        let fixupimm: f64 = simd_extract!(r, 0);
41051        let r = simd_insert!(a, 0, fixupimm);
41052        transmute(r)
41053    }
41054}
41055
41056/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
41057/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41058///
41059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
41060#[inline]
41061#[target_feature(enable = "avx512f")]
41062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41063#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
41064#[rustc_legacy_const_generics(4, 5)]
41065pub fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
41066    k: __mmask8,
41067    a: __m128d,
41068    b: __m128d,
41069    c: __m128i,
41070) -> __m128d {
41071    unsafe {
41072        static_assert_uimm_bits!(IMM8, 8);
41073        static_assert_mantissas_sae!(SAE);
41074        let a = a.as_f64x2();
41075        let b = b.as_f64x2();
41076        let c = c.as_i64x2();
41077        let r = vfixupimmsdz(a, b, c, IMM8, k, SAE);
41078        let fixupimm: f64 = simd_extract!(r, 0);
41079        let r = simd_insert!(a, 0, fixupimm);
41080        transmute(r)
41081    }
41082}
41083
41084/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41085///
41086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896)
41087#[inline]
41088#[target_feature(enable = "avx512f")]
41089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41090#[cfg_attr(test, assert_instr(vcvtss2sd))]
41091pub fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41092    unsafe {
41093        transmute(vcvtss2sd(
41094            a.as_f64x2(),
41095            b.as_f32x4(),
41096            src.as_f64x2(),
41097            k,
41098            _MM_FROUND_CUR_DIRECTION,
41099        ))
41100    }
41101}
41102
41103/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41104///
41105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897)
41106#[inline]
41107#[target_feature(enable = "avx512f")]
41108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41109#[cfg_attr(test, assert_instr(vcvtss2sd))]
41110pub fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41111    unsafe {
41112        transmute(vcvtss2sd(
41113            a.as_f64x2(),
41114            b.as_f32x4(),
41115            f64x2::ZERO,
41116            k,
41117            _MM_FROUND_CUR_DIRECTION,
41118        ))
41119    }
41120}
41121
41122/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41123///
41124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797)
41125#[inline]
41126#[target_feature(enable = "avx512f")]
41127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41128#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41129pub fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41130    unsafe {
41131        transmute(vcvtsd2ss(
41132            a.as_f32x4(),
41133            b.as_f64x2(),
41134            src.as_f32x4(),
41135            k,
41136            _MM_FROUND_CUR_DIRECTION,
41137        ))
41138    }
41139}
41140
41141/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41142///
41143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798)
41144#[inline]
41145#[target_feature(enable = "avx512f")]
41146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41147#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41148pub fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41149    unsafe {
41150        transmute(vcvtsd2ss(
41151            a.as_f32x4(),
41152            b.as_f64x2(),
41153            f32x4::ZERO,
41154            k,
41155            _MM_FROUND_CUR_DIRECTION,
41156        ))
41157    }
41158}
41159
41160/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41162///
41163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371)
41164#[inline]
41165#[target_feature(enable = "avx512f")]
41166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41167#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41168#[rustc_legacy_const_generics(2)]
41169pub fn _mm_cvt_roundss_sd<const SAE: i32>(a: __m128d, b: __m128) -> __m128d {
41170    unsafe {
41171        static_assert_sae!(SAE);
41172        let a = a.as_f64x2();
41173        let b = b.as_f32x4();
41174        let r = vcvtss2sd(a, b, f64x2::ZERO, 0b11111111, SAE);
41175        transmute(r)
41176    }
41177}
41178
41179/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41180/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41181///
41182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372)
41183#[inline]
41184#[target_feature(enable = "avx512f")]
41185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41186#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41187#[rustc_legacy_const_generics(4)]
41188pub fn _mm_mask_cvt_roundss_sd<const SAE: i32>(
41189    src: __m128d,
41190    k: __mmask8,
41191    a: __m128d,
41192    b: __m128,
41193) -> __m128d {
41194    unsafe {
41195        static_assert_sae!(SAE);
41196        let a = a.as_f64x2();
41197        let b = b.as_f32x4();
41198        let src = src.as_f64x2();
41199        let r = vcvtss2sd(a, b, src, k, SAE);
41200        transmute(r)
41201    }
41202}
41203
41204/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41205/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41206///
41207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373)
41208#[inline]
41209#[target_feature(enable = "avx512f")]
41210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41211#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41212#[rustc_legacy_const_generics(3)]
41213pub fn _mm_maskz_cvt_roundss_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41214    unsafe {
41215        static_assert_sae!(SAE);
41216        let a = a.as_f64x2();
41217        let b = b.as_f32x4();
41218        let r = vcvtss2sd(a, b, f64x2::ZERO, k, SAE);
41219        transmute(r)
41220    }
41221}
41222
41223/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41224/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41225/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41226/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41227/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41228/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41229/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41230///
41231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361)
41232#[inline]
41233#[target_feature(enable = "avx512f")]
41234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41235#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41236#[rustc_legacy_const_generics(2)]
41237pub fn _mm_cvt_roundsd_ss<const ROUNDING: i32>(a: __m128, b: __m128d) -> __m128 {
41238    unsafe {
41239        static_assert_rounding!(ROUNDING);
41240        let a = a.as_f32x4();
41241        let b = b.as_f64x2();
41242        let r = vcvtsd2ss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
41243        transmute(r)
41244    }
41245}
41246
41247/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41248/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41249/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41250/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41251/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41252/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41253/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41254///
41255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362)
41256#[inline]
41257#[target_feature(enable = "avx512f")]
41258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41259#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41260#[rustc_legacy_const_generics(4)]
41261pub fn _mm_mask_cvt_roundsd_ss<const ROUNDING: i32>(
41262    src: __m128,
41263    k: __mmask8,
41264    a: __m128,
41265    b: __m128d,
41266) -> __m128 {
41267    unsafe {
41268        static_assert_rounding!(ROUNDING);
41269        let a = a.as_f32x4();
41270        let b = b.as_f64x2();
41271        let src = src.as_f32x4();
41272        let r = vcvtsd2ss(a, b, src, k, ROUNDING);
41273        transmute(r)
41274    }
41275}
41276
41277/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41278/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41279/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41280/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41281/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41282/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41283/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41284///
41285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363)
41286#[inline]
41287#[target_feature(enable = "avx512f")]
41288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41289#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41290#[rustc_legacy_const_generics(3)]
41291pub fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41292    unsafe {
41293        static_assert_rounding!(ROUNDING);
41294        let a = a.as_f32x4();
41295        let b = b.as_f64x2();
41296        let r = vcvtsd2ss(a, b, f32x4::ZERO, k, ROUNDING);
41297        transmute(r)
41298    }
41299}
41300
41301/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41302/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41303/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41304/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41305/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41306/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41307/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41308///
41309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374)
41310#[inline]
41311#[target_feature(enable = "avx512f")]
41312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41313#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
41314#[rustc_legacy_const_generics(1)]
41315pub fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
41316    unsafe {
41317        static_assert_rounding!(ROUNDING);
41318        let a = a.as_f32x4();
41319        vcvtss2si(a, ROUNDING)
41320    }
41321}
41322
41323/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41324/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41325/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41326/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41327/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41328/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41329/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41330///
41331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369)
41332#[inline]
41333#[target_feature(enable = "avx512f")]
41334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41335#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
41336#[rustc_legacy_const_generics(1)]
41337pub fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
41338    unsafe {
41339        static_assert_rounding!(ROUNDING);
41340        let a = a.as_f32x4();
41341        vcvtss2si(a, ROUNDING)
41342    }
41343}
41344
41345/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41346/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41347/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41348/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41349/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41350/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41351/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41352///
41353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376)
41354#[inline]
41355#[target_feature(enable = "avx512f")]
41356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41357#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
41358#[rustc_legacy_const_generics(1)]
41359pub fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
41360    unsafe {
41361        static_assert_rounding!(ROUNDING);
41362        let a = a.as_f32x4();
41363        vcvtss2usi(a, ROUNDING)
41364    }
41365}
41366
41367/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41368///
41369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893)
41370#[inline]
41371#[target_feature(enable = "avx512f")]
41372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41373#[cfg_attr(test, assert_instr(vcvtss2si))]
41374pub fn _mm_cvtss_i32(a: __m128) -> i32 {
41375    unsafe { vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41376}
41377
41378/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41379///
41380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901)
41381#[inline]
41382#[target_feature(enable = "avx512f")]
41383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41384#[cfg_attr(test, assert_instr(vcvtss2usi))]
41385pub fn _mm_cvtss_u32(a: __m128) -> u32 {
41386    unsafe { vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41387}
41388
41389/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41390/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41391/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41392/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41393/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41394/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41395/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41396///
41397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359)
41398#[inline]
41399#[target_feature(enable = "avx512f")]
41400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41401#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
41402#[rustc_legacy_const_generics(1)]
41403pub fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
41404    unsafe {
41405        static_assert_rounding!(ROUNDING);
41406        let a = a.as_f64x2();
41407        vcvtsd2si(a, ROUNDING)
41408    }
41409}
41410
41411/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41412/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41413/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41414/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41415/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41416/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41417/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41418///
41419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357)
41420#[inline]
41421#[target_feature(enable = "avx512f")]
41422#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41423#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
41424#[rustc_legacy_const_generics(1)]
41425pub fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
41426    unsafe {
41427        static_assert_rounding!(ROUNDING);
41428        let a = a.as_f64x2();
41429        vcvtsd2si(a, ROUNDING)
41430    }
41431}
41432
41433/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41434/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41435/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41436/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41437/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41438/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41439/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41440///
41441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u32&expand=1364)
41442#[inline]
41443#[target_feature(enable = "avx512f")]
41444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41445#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
41446#[rustc_legacy_const_generics(1)]
41447pub fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
41448    unsafe {
41449        static_assert_rounding!(ROUNDING);
41450        let a = a.as_f64x2();
41451        vcvtsd2usi(a, ROUNDING)
41452    }
41453}
41454
41455/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41456///
41457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791)
41458#[inline]
41459#[target_feature(enable = "avx512f")]
41460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41461#[cfg_attr(test, assert_instr(vcvtsd2si))]
41462pub fn _mm_cvtsd_i32(a: __m128d) -> i32 {
41463    unsafe { vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41464}
41465
41466/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41467///
41468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799)
41469#[inline]
41470#[target_feature(enable = "avx512f")]
41471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41472#[cfg_attr(test, assert_instr(vcvtsd2usi))]
41473pub fn _mm_cvtsd_u32(a: __m128d) -> u32 {
41474    unsafe { vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41475}
41476
41477/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41478///
41479/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41480/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41481/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41482/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41483/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41484/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41485///
41486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312)
41487#[inline]
41488#[target_feature(enable = "avx512f")]
41489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41490#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
41491#[rustc_legacy_const_generics(2)]
41492pub fn _mm_cvt_roundi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41493    unsafe {
41494        static_assert_rounding!(ROUNDING);
41495        let a = a.as_f32x4();
41496        let r = vcvtsi2ss(a, b, ROUNDING);
41497        transmute(r)
41498    }
41499}
41500
41501/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41502///
41503/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41504/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41505/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41506/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41507/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41508/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41509///
41510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366)
41511#[inline]
41512#[target_feature(enable = "avx512f")]
41513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41514#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
41515#[rustc_legacy_const_generics(2)]
41516pub fn _mm_cvt_roundsi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41517    unsafe {
41518        static_assert_rounding!(ROUNDING);
41519        let a = a.as_f32x4();
41520        let r = vcvtsi2ss(a, b, ROUNDING);
41521        transmute(r)
41522    }
41523}
41524
41525/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41526/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41527/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41528/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41529/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41530/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41531/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41532///
41533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378)
41534#[inline]
41535#[target_feature(enable = "avx512f")]
41536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41537#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
41538#[rustc_legacy_const_generics(2)]
41539pub fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
41540    unsafe {
41541        static_assert_rounding!(ROUNDING);
41542        let a = a.as_f32x4();
41543        let r = vcvtusi2ss(a, b, ROUNDING);
41544        transmute(r)
41545    }
41546}
41547
41548/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41549///
41550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
41551#[inline]
41552#[target_feature(enable = "avx512f")]
41553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41554#[cfg_attr(test, assert_instr(vcvtsi2ss))]
41555pub fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
41556    unsafe {
41557        let b = b as f32;
41558        simd_insert!(a, 0, b)
41559    }
41560}
41561
41562/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41563///
41564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642)
41565#[inline]
41566#[target_feature(enable = "avx512f")]
41567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41568#[cfg_attr(test, assert_instr(vcvtsi2sd))]
41569pub fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
41570    unsafe {
41571        let b = b as f64;
41572        simd_insert!(a, 0, b)
41573    }
41574}
41575
41576/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41577/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41578///
41579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_si32&expand=1936)
41580#[inline]
41581#[target_feature(enable = "avx512f")]
41582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41583#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
41584#[rustc_legacy_const_generics(1)]
41585pub fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
41586    unsafe {
41587        static_assert_sae!(SAE);
41588        let a = a.as_f32x4();
41589        vcvttss2si(a, SAE)
41590    }
41591}
41592
41593/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41594/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41595///
41596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934)
41597#[inline]
41598#[target_feature(enable = "avx512f")]
41599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41600#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
41601#[rustc_legacy_const_generics(1)]
41602pub fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
41603    unsafe {
41604        static_assert_sae!(SAE);
41605        let a = a.as_f32x4();
41606        vcvttss2si(a, SAE)
41607    }
41608}
41609
41610/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41611/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41612///
41613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938)
41614#[inline]
41615#[target_feature(enable = "avx512f")]
41616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41617#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))]
41618#[rustc_legacy_const_generics(1)]
41619pub fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
41620    unsafe {
41621        static_assert_sae!(SAE);
41622        let a = a.as_f32x4();
41623        vcvttss2usi(a, SAE)
41624    }
41625}
41626
41627/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41628///
41629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022)
41630#[inline]
41631#[target_feature(enable = "avx512f")]
41632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41633#[cfg_attr(test, assert_instr(vcvttss2si))]
41634pub fn _mm_cvttss_i32(a: __m128) -> i32 {
41635    unsafe { vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41636}
41637
41638/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41639///
41640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026)
41641#[inline]
41642#[target_feature(enable = "avx512f")]
41643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41644#[cfg_attr(test, assert_instr(vcvttss2usi))]
41645pub fn _mm_cvttss_u32(a: __m128) -> u32 {
41646    unsafe { vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41647}
41648
41649/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41650/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41651///
41652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930)
41653#[inline]
41654#[target_feature(enable = "avx512f")]
41655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41656#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
41657#[rustc_legacy_const_generics(1)]
41658pub fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
41659    unsafe {
41660        static_assert_sae!(SAE);
41661        let a = a.as_f64x2();
41662        vcvttsd2si(a, SAE)
41663    }
41664}
41665
41666/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41667/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41668///
41669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928)
41670#[inline]
41671#[target_feature(enable = "avx512f")]
41672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41673#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
41674#[rustc_legacy_const_generics(1)]
41675pub fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
41676    unsafe {
41677        static_assert_sae!(SAE);
41678        let a = a.as_f64x2();
41679        vcvttsd2si(a, SAE)
41680    }
41681}
41682
41683/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41684/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41685///
41686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932)
41687#[inline]
41688#[target_feature(enable = "avx512f")]
41689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41690#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))]
41691#[rustc_legacy_const_generics(1)]
41692pub fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
41693    unsafe {
41694        static_assert_sae!(SAE);
41695        let a = a.as_f64x2();
41696        vcvttsd2usi(a, SAE)
41697    }
41698}
41699
41700/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41701///
41702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015)
41703#[inline]
41704#[target_feature(enable = "avx512f")]
41705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41706#[cfg_attr(test, assert_instr(vcvttsd2si))]
41707pub fn _mm_cvttsd_i32(a: __m128d) -> i32 {
41708    unsafe { vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41709}
41710
41711/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41712///
41713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020)
41714#[inline]
41715#[target_feature(enable = "avx512f")]
41716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41717#[cfg_attr(test, assert_instr(vcvttsd2usi))]
41718pub fn _mm_cvttsd_u32(a: __m128d) -> u32 {
41719    unsafe { vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41720}
41721
41722/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41723///
41724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032)
41725#[inline]
41726#[target_feature(enable = "avx512f")]
41727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41728#[cfg_attr(test, assert_instr(vcvtusi2ss))]
41729pub fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
41730    unsafe {
41731        let b = b as f32;
41732        simd_insert!(a, 0, b)
41733    }
41734}
41735
41736/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41737///
41738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031)
41739#[inline]
41740#[target_feature(enable = "avx512f")]
41741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41742#[cfg_attr(test, assert_instr(vcvtusi2sd))]
41743pub fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
41744    unsafe {
41745        let b = b as f64;
41746        simd_insert!(a, 0, b)
41747    }
41748}
41749
41750/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41751/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41752///
41753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175)
41754#[inline]
41755#[target_feature(enable = "avx512f")]
41756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41757#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomiss
41758#[rustc_legacy_const_generics(2, 3)]
41759pub fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> i32 {
41760    unsafe {
41761        static_assert_uimm_bits!(IMM5, 5);
41762        static_assert_mantissas_sae!(SAE);
41763        let a = a.as_f32x4();
41764        let b = b.as_f32x4();
41765        vcomiss(a, b, IMM5, SAE)
41766    }
41767}
41768
41769/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41770/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41771///
41772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174)
41773#[inline]
41774#[target_feature(enable = "avx512f")]
41775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41776#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomisd
41777#[rustc_legacy_const_generics(2, 3)]
41778pub fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> i32 {
41779    unsafe {
41780        static_assert_uimm_bits!(IMM5, 5);
41781        static_assert_mantissas_sae!(SAE);
41782        let a = a.as_f64x2();
41783        let b = b.as_f64x2();
41784        vcomisd(a, b, IMM5, SAE)
41785    }
41786}
41787
41788/// Equal
41789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41790pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
41791/// Less-than
41792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41793pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
41794/// Less-than-or-equal
41795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41796pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
41797/// False
41798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41799pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
41800/// Not-equal
41801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41802pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
41803/// Not less-than
41804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41805pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
41806/// Not less-than-or-equal
41807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41808pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
41809/// True
41810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41811pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
41812
41813/// interval [1, 2)
41814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41815pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
41816/// interval [0.5, 2)
41817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41818pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
41819/// interval [0.5, 1)
41820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41821pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
41822/// interval [0.75, 1.5)
41823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41824pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
41825
41826/// sign = sign(SRC)
41827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41828pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
41829/// sign = 0
41830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41831pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
41832/// DEST = NaN if sign(SRC) = 1
41833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41834pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
41835
41836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41837pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
41838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41839pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
41840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41841pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
41842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41843pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
41844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41845pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
41846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41847pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
41848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41849pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
41850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41851pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
41852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41853pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
41854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41855pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
41856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41857pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
41858#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41859pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
41860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41861pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
41862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41863pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
41864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41865pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
41866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41867pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
41868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41869pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
41870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41871pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
41872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41873pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
41874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41875pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
41876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41877pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
41878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41879pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
41880#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41881pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
41882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41883pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
41884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41885pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
41886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41887pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
41888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41889pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
41890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41891pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
41892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41893pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
41894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41895pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
41896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41897pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
41898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41899pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
41900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41901pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
41902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41903pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
41904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41905pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
41906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41907pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
41908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41909pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
41910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41911pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
41912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41913pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
41914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41915pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
41916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41917pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
41918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41919pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
41920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41921pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
41922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41923pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
41924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41925pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
41926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41927pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
41928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41929pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
41930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41931pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
41932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41933pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
41934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41935pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
41936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41937pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
41938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41939pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
41940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41941pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
41942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41943pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
41944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41945pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
41946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41947pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
41948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41949pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
41950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41951pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
41952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41953pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
41954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41955pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
41956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41957pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
41958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41959pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
41960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41961pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
41962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41963pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
41964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41965pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
41966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41967pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
41968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41969pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
41970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41971pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
41972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41973pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
41974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41975pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
41976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41977pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
41978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41979pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
41980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41981pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
41982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41983pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
41984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41985pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
41986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41987pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
41988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41989pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
41990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41991pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
41992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41993pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
41994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41995pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
41996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41997pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
41998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41999pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
42000#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42001pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
42002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42003pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
42004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42005pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
42006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42007pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
42008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42009pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
42010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42011pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
42012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42013pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
42014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42015pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
42016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42017pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
42018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42019pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
42020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42021pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
42022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42023pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
42024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42025pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
42026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42027pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
42028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42029pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
42030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42031pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
42032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42033pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
42034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42035pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
42036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42037pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
42038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42039pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
42040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42041pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
42042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42043pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
42044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42045pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
42046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42047pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
42048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42049pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
42050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42051pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
42052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42053pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
42054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42055pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
42056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42057pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
42058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42059pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
42060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42061pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
42062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42063pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
42064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42065pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
42066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42067pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
42068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42069pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
42070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42071pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
42072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42073pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
42074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42075pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
42076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42077pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
42078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42079pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
42080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42081pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
42082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42083pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
42084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42085pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
42086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42087pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
42088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42089pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
42090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42091pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
42092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42093pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
42094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42095pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
42096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42097pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
42098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42099pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
42100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42101pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
42102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42103pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
42104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42105pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
42106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42107pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
42108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42109pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
42110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42111pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
42112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42113pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
42114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42115pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
42116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42117pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
42118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42119pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
42120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42121pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
42122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42123pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
42124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42125pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
42126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42127pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
42128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42129pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
42130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42131pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
42132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42133pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
42134#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42135pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
42136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42137pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
42138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42139pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
42140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42141pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
42142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42143pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
42144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42145pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
42146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42147pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
42148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42149pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
42150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42151pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
42152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42153pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
42154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42155pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
42156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42157pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
42158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42159pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
42160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42161pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
42162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42163pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
42164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42165pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
42166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42167pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
42168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42169pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
42170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42171pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
42172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42173pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
42174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42175pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
42176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42177pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
42178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42179pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
42180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42181pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
42182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42183pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
42184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42185pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
42186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42187pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
42188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42189pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
42190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42191pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
42192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42193pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
42194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42195pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
42196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42197pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
42198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42199pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
42200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42201pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
42202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42203pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
42204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42205pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
42206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42207pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
42208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42209pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
42210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42211pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
42212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42213pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
42214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42215pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
42216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42217pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
42218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42219pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
42220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42221pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
42222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42223pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
42224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42225pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
42226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42227pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
42228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42229pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
42230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42231pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
42232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42233pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
42234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42235pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
42236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42237pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
42238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42239pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
42240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42241pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
42242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42243pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
42244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42245pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
42246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42247pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
42248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42249pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
42250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42251pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
42252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42253pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
42254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42255pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
42256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42257pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
42258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42259pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
42260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42261pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
42262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42263pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
42264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42265pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
42266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42267pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
42268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42269pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
42270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42271pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
42272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42273pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
42274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42275pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
42276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42277pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
42278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42279pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
42280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42281pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
42282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42283pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
42284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42285pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
42286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42287pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
42288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42289pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
42290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42291pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
42292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42293pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
42294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42295pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
42296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42297pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
42298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42299pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
42300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42301pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
42302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42303pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
42304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42305pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
42306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42307pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
42308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42309pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
42310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42311pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
42312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42313pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
42314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42315pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
42316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42317pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
42318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42319pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
42320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42321pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
42322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42323pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
42324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42325pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
42326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42327pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
42328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42329pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
42330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42331pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
42332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42333pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
42334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42335pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
42336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42337pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
42338#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42339pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
42340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42341pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
42342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42343pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
42344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42345pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
42346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42347pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
42348
42349#[allow(improper_ctypes)]
42350unsafe extern "C" {
42351    #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
42352    fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
42353    #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
42354    fn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
42355
42356    #[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
42357    fn vfmadd132psround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512;
42358    #[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
42359    fn vfmadd132pdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d;
42360
42361    #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
42362    fn vfmaddsubpsround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512; //from clang
42363    #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
42364    fn vfmaddsubpdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d; //from clang
42365
42366    #[link_name = "llvm.x86.avx512.add.ps.512"]
42367    fn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42368    #[link_name = "llvm.x86.avx512.add.pd.512"]
42369    fn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42370    #[link_name = "llvm.x86.avx512.sub.ps.512"]
42371    fn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42372    #[link_name = "llvm.x86.avx512.sub.pd.512"]
42373    fn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42374    #[link_name = "llvm.x86.avx512.mul.ps.512"]
42375    fn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42376    #[link_name = "llvm.x86.avx512.mul.pd.512"]
42377    fn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42378    #[link_name = "llvm.x86.avx512.div.ps.512"]
42379    fn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42380    #[link_name = "llvm.x86.avx512.div.pd.512"]
42381    fn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42382
42383    #[link_name = "llvm.x86.avx512.max.ps.512"]
42384    fn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42385    #[link_name = "llvm.x86.avx512.max.pd.512"]
42386    fn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42387    #[link_name = "llvm.x86.avx512.min.ps.512"]
42388    fn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42389    #[link_name = "llvm.x86.avx512.min.pd.512"]
42390    fn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42391
42392    #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
42393    fn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
42394
42395    #[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
42396    fn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42397    #[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
42398    fn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42399
42400    #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
42401    fn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
42402    #[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
42403    fn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42404    #[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
42405    fn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42406
42407    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
42408    fn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
42409    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
42410    fn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
42411    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
42412    fn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
42413
42414    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
42415    fn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
42416    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
42417    fn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
42418    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
42419    fn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
42420
42421    #[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
42422    fn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
42423    #[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
42424    fn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
42425    #[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
42426    fn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
42427
42428    #[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
42429    fn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
42430    #[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
42431    fn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
42432    #[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
42433    fn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
42434
42435    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
42436    fn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42437    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
42438    fn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42439    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
42440    fn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42441
42442    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
42443    fn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42444    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
42445    fn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42446    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
42447    fn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42448
42449    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
42450    fn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42451    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
42452    fn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42453    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
42454    fn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42455
42456    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
42457    fn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42458    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
42459    fn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42460    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
42461    fn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42462
42463    #[link_name = "llvm.x86.avx512.pternlog.d.512"]
42464    fn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
42465    #[link_name = "llvm.x86.avx512.pternlog.d.256"]
42466    fn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
42467    #[link_name = "llvm.x86.avx512.pternlog.d.128"]
42468    fn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
42469
42470    #[link_name = "llvm.x86.avx512.pternlog.q.512"]
42471    fn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
42472    #[link_name = "llvm.x86.avx512.pternlog.q.256"]
42473    fn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
42474    #[link_name = "llvm.x86.avx512.pternlog.q.128"]
42475    fn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
42476
42477    #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
42478    fn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
42479    #[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
42480    fn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
42481    #[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
42482    fn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
42483
42484    #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
42485    fn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
42486    #[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
42487    fn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
42488    #[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
42489    fn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
42490
42491    #[link_name = "llvm.x86.avx512.rcp14.ps.512"]
42492    fn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42493    #[link_name = "llvm.x86.avx512.rcp14.ps.256"]
42494    fn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42495    #[link_name = "llvm.x86.avx512.rcp14.ps.128"]
42496    fn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42497
42498    #[link_name = "llvm.x86.avx512.rcp14.pd.512"]
42499    fn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42500    #[link_name = "llvm.x86.avx512.rcp14.pd.256"]
42501    fn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42502    #[link_name = "llvm.x86.avx512.rcp14.pd.128"]
42503    fn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42504
42505    #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
42506    fn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42507    #[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
42508    fn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42509    #[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
42510    fn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42511
42512    #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
42513    fn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42514    #[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
42515    fn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42516    #[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
42517    fn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42518
42519    #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
42520    fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42521
42522    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
42523    fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42524    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
42525    fn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42526    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
42527    fn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42528
42529    #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
42530    fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
42531    #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
42532    fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
42533
42534    #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
42535    fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42536
42537    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
42538    fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
42539    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
42540    fn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
42541    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
42542    fn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
42543
42544    #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
42545    fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
42546    #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
42547    fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
42548
42549    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
42550    fn vcvtps2ph(a: f32x16, rounding: i32, src: i16x16, mask: u16) -> i16x16;
42551    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
42552    fn vcvtps2ph256(a: f32x8, imm8: i32, src: i16x8, mask: u8) -> i16x8;
42553    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
42554    fn vcvtps2ph128(a: f32x4, imm8: i32, src: i16x8, mask: u8) -> i16x8;
42555
42556    #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
42557    fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
42558
42559    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
42560    fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42561    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
42562    fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
42563    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
42564    fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
42565
42566    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
42567    fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42568    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
42569    fn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42570    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
42571    fn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42572
42573    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
42574    fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42575    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
42576    fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
42577    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
42578    fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
42579
42580    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
42581    fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
42582    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
42583    fn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
42584    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
42585    fn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
42586
42587    #[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
42588    fn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42589    #[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
42590    fn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42591    #[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
42592    fn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42593
42594    #[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
42595    fn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42596    #[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
42597    fn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42598    #[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
42599    fn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42600    #[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
42601    fn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42602    #[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
42603    fn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42604
42605    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
42606    fn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42607    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
42608    fn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42609    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
42610    fn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42611
42612    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
42613    fn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42614    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
42615    fn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42616    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
42617    fn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42618
42619    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
42620    fn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42621    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
42622    fn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42623    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
42624    fn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42625
42626    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
42627    fn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42628    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
42629    fn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42630    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
42631    fn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42632
42633    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
42634    fn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42635    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
42636    fn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42637    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
42638    fn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42639
42640    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
42641    fn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42642    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
42643    fn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42644    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
42645    fn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42646
42647    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
42648    fn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42649    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
42650    fn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42651    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
42652    fn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42653
42654    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
42655    fn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42656    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
42657    fn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42658    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
42659    fn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42660
42661    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
42662    fn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42663    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
42664    fn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42665    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
42666    fn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42667
42668    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
42669    fn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42670    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
42671    fn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42672    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
42673    fn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42674
42675    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
42676    fn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42677    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
42678    fn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42679    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
42680    fn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42681
42682    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
42683    fn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42684    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
42685    fn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42686    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
42687    fn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42688
42689    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
42690    fn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42691    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
42692    fn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42693    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
42694    fn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42695
42696    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
42697    fn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42698    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
42699    fn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42700    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
42701    fn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42702
42703    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
42704    fn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42705    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
42706    fn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42707    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
42708    fn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42709
42710    #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
42711    fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42712
42713    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
42714    fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
42715    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
42716    fn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
42717    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
42718    fn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42719
42720    #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
42721    fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
42722    #[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
42723    fn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42724    #[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
42725    fn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42726
42727    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
42728    fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
42729    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
42730    fn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
42731    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
42732    fn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42733
42734    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
42735    fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
42736    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
42737    fn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42738    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
42739    fn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42740
42741    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
42742    fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42743    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
42744    fn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42745    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
42746    fn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42747
42748    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
42749    fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
42750    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
42751    fn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
42752    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
42753    fn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
42754
42755    #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
42756    fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
42757    #[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
42758    fn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
42759    #[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
42760    fn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
42761
42762    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
42763    fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
42764    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
42765    fn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
42766    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
42767    fn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
42768
42769    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
42770    fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
42771    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
42772    fn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
42773    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
42774    fn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
42775
42776    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
42777    fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
42778    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
42779    fn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
42780    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
42781    fn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
42782
42783    #[link_name = "llvm.x86.avx512.gather.dpd.512"]
42784    fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
42785    #[link_name = "llvm.x86.avx512.gather.dps.512"]
42786    fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
42787    #[link_name = "llvm.x86.avx512.gather.qpd.512"]
42788    fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
42789    #[link_name = "llvm.x86.avx512.gather.qps.512"]
42790    fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
42791    #[link_name = "llvm.x86.avx512.gather.dpq.512"]
42792    fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
42793    #[link_name = "llvm.x86.avx512.gather.dpi.512"]
42794    fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
42795    #[link_name = "llvm.x86.avx512.gather.qpq.512"]
42796    fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
42797    #[link_name = "llvm.x86.avx512.gather.qpi.512"]
42798    fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
42799
42800    #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
42801    fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
42802    #[link_name = "llvm.x86.avx512.scatter.dps.512"]
42803    fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
42804    #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
42805    fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
42806    #[link_name = "llvm.x86.avx512.scatter.qps.512"]
42807    fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
42808    #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
42809    fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
42810
42811    #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
42812    fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
42813    #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
42814    fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
42815    #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
42816    fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
42817
42818    #[link_name = "llvm.x86.avx512.scattersiv4.si"]
42819    fn vpscatterdd_128(slice: *mut i8, k: u8, offsets: i32x4, src: i32x4, scale: i32);
42820    #[link_name = "llvm.x86.avx512.scattersiv2.di"]
42821    fn vpscatterdq_128(slice: *mut i8, k: u8, offsets: i32x4, src: i64x2, scale: i32);
42822    #[link_name = "llvm.x86.avx512.scattersiv2.df"]
42823    fn vscatterdpd_128(slice: *mut i8, k: u8, offsets: i32x4, src: f64x2, scale: i32);
42824    #[link_name = "llvm.x86.avx512.scattersiv4.sf"]
42825    fn vscatterdps_128(slice: *mut i8, k: u8, offsets: i32x4, src: f32x4, scale: i32);
42826    #[link_name = "llvm.x86.avx512.scatterdiv4.si"]
42827    fn vpscatterqd_128(slice: *mut i8, k: u8, offsets: i64x2, src: i32x4, scale: i32);
42828    #[link_name = "llvm.x86.avx512.scatterdiv2.di"]
42829    fn vpscatterqq_128(slice: *mut i8, k: u8, offsets: i64x2, src: i64x2, scale: i32);
42830    #[link_name = "llvm.x86.avx512.scatterdiv2.df"]
42831    fn vscatterqpd_128(slice: *mut i8, k: u8, offsets: i64x2, src: f64x2, scale: i32);
42832    #[link_name = "llvm.x86.avx512.scatterdiv4.sf"]
42833    fn vscatterqps_128(slice: *mut i8, k: u8, offsets: i64x2, src: f32x4, scale: i32);
42834
42835    #[link_name = "llvm.x86.avx512.scattersiv8.si"]
42836    fn vpscatterdd_256(slice: *mut i8, k: u8, offsets: i32x8, src: i32x8, scale: i32);
42837    #[link_name = "llvm.x86.avx512.scattersiv4.di"]
42838    fn vpscatterdq_256(slice: *mut i8, k: u8, offsets: i32x4, src: i64x4, scale: i32);
42839    #[link_name = "llvm.x86.avx512.scattersiv4.df"]
42840    fn vscatterdpd_256(slice: *mut i8, k: u8, offsets: i32x4, src: f64x4, scale: i32);
42841    #[link_name = "llvm.x86.avx512.scattersiv8.sf"]
42842    fn vscatterdps_256(slice: *mut i8, k: u8, offsets: i32x8, src: f32x8, scale: i32);
42843    #[link_name = "llvm.x86.avx512.scatterdiv8.si"]
42844    fn vpscatterqd_256(slice: *mut i8, k: u8, offsets: i64x4, src: i32x4, scale: i32);
42845    #[link_name = "llvm.x86.avx512.scatterdiv4.di"]
42846    fn vpscatterqq_256(slice: *mut i8, k: u8, offsets: i64x4, src: i64x4, scale: i32);
42847    #[link_name = "llvm.x86.avx512.scatterdiv4.df"]
42848    fn vscatterqpd_256(slice: *mut i8, k: u8, offsets: i64x4, src: f64x4, scale: i32);
42849    #[link_name = "llvm.x86.avx512.scatterdiv8.sf"]
42850    fn vscatterqps_256(slice: *mut i8, k: u8, offsets: i64x4, src: f32x4, scale: i32);
42851
42852    #[link_name = "llvm.x86.avx512.gather3siv4.si"]
42853    fn vpgatherdd_128(src: i32x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i32x4;
42854    #[link_name = "llvm.x86.avx512.gather3siv2.di"]
42855    fn vpgatherdq_128(src: i64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x2;
42856    #[link_name = "llvm.x86.avx512.gather3siv2.df"]
42857    fn vgatherdpd_128(src: f64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x2;
42858    #[link_name = "llvm.x86.avx512.gather3siv4.sf"]
42859    fn vgatherdps_128(src: f32x4, slice: *const u8, offsets: i32x4, k: u8, scale: i32) -> f32x4;
42860    #[link_name = "llvm.x86.avx512.gather3div4.si"]
42861    fn vpgatherqd_128(src: i32x4, slice: *const u8, offsets: i64x2, k: u8, scale: i32) -> i32x4;
42862    #[link_name = "llvm.x86.avx512.gather3div2.di"]
42863    fn vpgatherqq_128(src: i64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> i64x2;
42864    #[link_name = "llvm.x86.avx512.gather3div2.df"]
42865    fn vgatherqpd_128(src: f64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f64x2;
42866    #[link_name = "llvm.x86.avx512.gather3div4.sf"]
42867    fn vgatherqps_128(src: f32x4, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f32x4;
42868
42869    #[link_name = "llvm.x86.avx512.gather3siv8.si"]
42870    fn vpgatherdd_256(src: i32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> i32x8;
42871    #[link_name = "llvm.x86.avx512.gather3siv4.di"]
42872    fn vpgatherdq_256(src: i64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x4;
42873    #[link_name = "llvm.x86.avx512.gather3siv4.df"]
42874    fn vgatherdpd_256(src: f64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x4;
42875    #[link_name = "llvm.x86.avx512.gather3siv8.sf"]
42876    fn vgatherdps_256(src: f32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> f32x8;
42877    #[link_name = "llvm.x86.avx512.gather3div8.si"]
42878    fn vpgatherqd_256(src: i32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i32x4;
42879    #[link_name = "llvm.x86.avx512.gather3div4.di"]
42880    fn vpgatherqq_256(src: i64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i64x4;
42881    #[link_name = "llvm.x86.avx512.gather3div4.df"]
42882    fn vgatherqpd_256(src: f64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f64x4;
42883    #[link_name = "llvm.x86.avx512.gather3div8.sf"]
42884    fn vgatherqps_256(src: f32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f32x4;
42885
42886    #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
42887    fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
42888    #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
42889    fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
42890
42891    #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
42892    fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
42893    #[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
42894    fn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
42895    #[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
42896    fn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
42897
42898    #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
42899    fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
42900    #[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
42901    fn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
42902    #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
42903    fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
42904
42905    #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
42906    fn vprold(a: i32x16, i8: i32) -> i32x16;
42907    #[link_name = "llvm.x86.avx512.mask.prol.d.256"]
42908    fn vprold256(a: i32x8, i8: i32) -> i32x8;
42909    #[link_name = "llvm.x86.avx512.mask.prol.d.128"]
42910    fn vprold128(a: i32x4, i8: i32) -> i32x4;
42911
42912    #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
42913    fn vprord(a: i32x16, i8: i32) -> i32x16;
42914    #[link_name = "llvm.x86.avx512.mask.pror.d.256"]
42915    fn vprord256(a: i32x8, i8: i32) -> i32x8;
42916    #[link_name = "llvm.x86.avx512.mask.pror.d.128"]
42917    fn vprord128(a: i32x4, i8: i32) -> i32x4;
42918
42919    #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
42920    fn vprolq(a: i64x8, i8: i32) -> i64x8;
42921    #[link_name = "llvm.x86.avx512.mask.prol.q.256"]
42922    fn vprolq256(a: i64x4, i8: i32) -> i64x4;
42923    #[link_name = "llvm.x86.avx512.mask.prol.q.128"]
42924    fn vprolq128(a: i64x2, i8: i32) -> i64x2;
42925
42926    #[link_name = "llvm.x86.avx512.mask.pror.q.512"]
42927    fn vprorq(a: i64x8, i8: i32) -> i64x8;
42928    #[link_name = "llvm.x86.avx512.mask.pror.q.256"]
42929    fn vprorq256(a: i64x4, i8: i32) -> i64x4;
42930    #[link_name = "llvm.x86.avx512.mask.pror.q.128"]
42931    fn vprorq128(a: i64x2, i8: i32) -> i64x2;
42932
42933    #[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
42934    fn vprolvd(a: i32x16, b: i32x16) -> i32x16;
42935    #[link_name = "llvm.x86.avx512.mask.prolv.d.256"]
42936    fn vprolvd256(a: i32x8, b: i32x8) -> i32x8;
42937    #[link_name = "llvm.x86.avx512.mask.prolv.d.128"]
42938    fn vprolvd128(a: i32x4, b: i32x4) -> i32x4;
42939
42940    #[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
42941    fn vprorvd(a: i32x16, b: i32x16) -> i32x16;
42942    #[link_name = "llvm.x86.avx512.mask.prorv.d.256"]
42943    fn vprorvd256(a: i32x8, b: i32x8) -> i32x8;
42944    #[link_name = "llvm.x86.avx512.mask.prorv.d.128"]
42945    fn vprorvd128(a: i32x4, b: i32x4) -> i32x4;
42946
42947    #[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
42948    fn vprolvq(a: i64x8, b: i64x8) -> i64x8;
42949    #[link_name = "llvm.x86.avx512.mask.prolv.q.256"]
42950    fn vprolvq256(a: i64x4, b: i64x4) -> i64x4;
42951    #[link_name = "llvm.x86.avx512.mask.prolv.q.128"]
42952    fn vprolvq128(a: i64x2, b: i64x2) -> i64x2;
42953
42954    #[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
42955    fn vprorvq(a: i64x8, b: i64x8) -> i64x8;
42956    #[link_name = "llvm.x86.avx512.mask.prorv.q.256"]
42957    fn vprorvq256(a: i64x4, b: i64x4) -> i64x4;
42958    #[link_name = "llvm.x86.avx512.mask.prorv.q.128"]
42959    fn vprorvq128(a: i64x2, b: i64x2) -> i64x2;
42960
42961    #[link_name = "llvm.x86.avx512.psllv.d.512"]
42962    fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
42963    #[link_name = "llvm.x86.avx512.psrlv.d.512"]
42964    fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
42965    #[link_name = "llvm.x86.avx512.psllv.q.512"]
42966    fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
42967    #[link_name = "llvm.x86.avx512.psrlv.q.512"]
42968    fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
42969
42970    #[link_name = "llvm.x86.avx512.psll.d.512"]
42971    fn vpslld(a: i32x16, count: i32x4) -> i32x16;
42972    #[link_name = "llvm.x86.avx512.psrl.d.512"]
42973    fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
42974    #[link_name = "llvm.x86.avx512.psll.q.512"]
42975    fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
42976    #[link_name = "llvm.x86.avx512.psrl.q.512"]
42977    fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
42978
42979    #[link_name = "llvm.x86.avx512.psra.d.512"]
42980    fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
42981
42982    #[link_name = "llvm.x86.avx512.psra.q.512"]
42983    fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
42984    #[link_name = "llvm.x86.avx512.psra.q.256"]
42985    fn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
42986    #[link_name = "llvm.x86.avx512.psra.q.128"]
42987    fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
42988
42989    #[link_name = "llvm.x86.avx512.psrav.d.512"]
42990    fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
42991
42992    #[link_name = "llvm.x86.avx512.psrav.q.512"]
42993    fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
42994    #[link_name = "llvm.x86.avx512.psrav.q.256"]
42995    fn vpsravq256(a: i64x4, count: i64x4) -> i64x4;
42996    #[link_name = "llvm.x86.avx512.psrav.q.128"]
42997    fn vpsravq128(a: i64x2, count: i64x2) -> i64x2;
42998
42999    #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
43000    fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
43001    #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
43002    fn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
43003
43004    #[link_name = "llvm.x86.avx512.permvar.si.512"]
43005    fn vpermd(a: i32x16, idx: i32x16) -> i32x16;
43006
43007    #[link_name = "llvm.x86.avx512.permvar.di.512"]
43008    fn vpermq(a: i64x8, idx: i64x8) -> i64x8;
43009    #[link_name = "llvm.x86.avx512.permvar.di.256"]
43010    fn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
43011
43012    #[link_name = "llvm.x86.avx512.permvar.sf.512"]
43013    fn vpermps(a: f32x16, idx: i32x16) -> f32x16;
43014
43015    #[link_name = "llvm.x86.avx512.permvar.df.512"]
43016    fn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
43017    #[link_name = "llvm.x86.avx512.permvar.df.256"]
43018    fn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
43019
43020    #[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
43021    fn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
43022    #[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
43023    fn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
43024    #[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
43025    fn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
43026
43027    #[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
43028    fn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
43029    #[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
43030    fn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
43031    #[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
43032    fn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
43033
43034    #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
43035    fn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
43036    #[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
43037    fn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
43038    #[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
43039    fn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
43040
43041    #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
43042    fn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
43043    #[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
43044    fn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
43045    #[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
43046    fn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
43047
43048    #[link_name = "llvm.x86.avx512.mask.compress.d.512"]
43049    fn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
43050    #[link_name = "llvm.x86.avx512.mask.compress.d.256"]
43051    fn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
43052    #[link_name = "llvm.x86.avx512.mask.compress.d.128"]
43053    fn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
43054
43055    #[link_name = "llvm.x86.avx512.mask.compress.q.512"]
43056    fn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
43057    #[link_name = "llvm.x86.avx512.mask.compress.q.256"]
43058    fn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
43059    #[link_name = "llvm.x86.avx512.mask.compress.q.128"]
43060    fn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
43061
43062    #[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
43063    fn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
43064    #[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
43065    fn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
43066    #[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
43067    fn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
43068
43069    #[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
43070    fn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
43071    #[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
43072    fn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
43073    #[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
43074    fn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
43075
43076    #[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
43077    fn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
43078    #[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
43079    fn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
43080    #[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
43081    fn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
43082
43083    #[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
43084    fn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
43085    #[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
43086    fn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
43087    #[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
43088    fn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
43089
43090    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
43091    fn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
43092    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
43093    fn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
43094    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
43095    fn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
43096
43097    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
43098    fn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
43099    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
43100    fn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
43101    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
43102    fn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
43103
43104    #[link_name = "llvm.x86.avx512.mask.expand.d.512"]
43105    fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
43106    #[link_name = "llvm.x86.avx512.mask.expand.d.256"]
43107    fn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
43108    #[link_name = "llvm.x86.avx512.mask.expand.d.128"]
43109    fn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
43110
43111    #[link_name = "llvm.x86.avx512.mask.expand.q.512"]
43112    fn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
43113    #[link_name = "llvm.x86.avx512.mask.expand.q.256"]
43114    fn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
43115    #[link_name = "llvm.x86.avx512.mask.expand.q.128"]
43116    fn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
43117
43118    #[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
43119    fn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
43120    #[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
43121    fn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
43122    #[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
43123    fn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
43124
43125    #[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
43126    fn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
43127    #[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
43128    fn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
43129    #[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
43130    fn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
43131
43132    #[link_name = "llvm.x86.avx512.mask.add.ss.round"]
43133    fn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43134    #[link_name = "llvm.x86.avx512.mask.add.sd.round"]
43135    fn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43136    #[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
43137    fn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43138    #[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
43139    fn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43140    #[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
43141    fn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43142    #[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
43143    fn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43144    #[link_name = "llvm.x86.avx512.mask.div.ss.round"]
43145    fn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43146    #[link_name = "llvm.x86.avx512.mask.div.sd.round"]
43147    fn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43148    #[link_name = "llvm.x86.avx512.mask.max.ss.round"]
43149    fn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43150    #[link_name = "llvm.x86.avx512.mask.max.sd.round"]
43151    fn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43152    #[link_name = "llvm.x86.avx512.mask.min.ss.round"]
43153    fn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43154    #[link_name = "llvm.x86.avx512.mask.min.sd.round"]
43155    fn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43156    #[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
43157    fn vsqrtss(a: __m128, b: __m128, src: __m128, mask: u8, rounding: i32) -> __m128;
43158    #[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
43159    fn vsqrtsd(a: __m128d, b: __m128d, src: __m128d, mask: u8, rounding: i32) -> __m128d;
43160    #[link_name = "llvm.x86.avx512.mask.getexp.ss"]
43161    fn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43162    #[link_name = "llvm.x86.avx512.mask.getexp.sd"]
43163    fn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43164    #[link_name = "llvm.x86.avx512.mask.getmant.ss"]
43165    fn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
43166    #[link_name = "llvm.x86.avx512.mask.getmant.sd"]
43167    fn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
43168
43169    #[link_name = "llvm.x86.avx512.rsqrt14.ss"]
43170    fn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43171    #[link_name = "llvm.x86.avx512.rsqrt14.sd"]
43172    fn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43173    #[link_name = "llvm.x86.avx512.rcp14.ss"]
43174    fn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43175    #[link_name = "llvm.x86.avx512.rcp14.sd"]
43176    fn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43177
43178    #[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
43179    fn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
43180    #[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
43181    fn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
43182    #[link_name = "llvm.x86.avx512.mask.scalef.ss"]
43183    fn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43184    #[link_name = "llvm.x86.avx512.mask.scalef.sd"]
43185    fn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43186
43187    #[link_name = "llvm.x86.avx512.vfmadd.f32"]
43188    fn vfmaddssround(a: f32, b: f32, c: f32, rounding: i32) -> f32;
43189    #[link_name = "llvm.x86.avx512.vfmadd.f64"]
43190    fn vfmaddsdround(a: f64, b: f64, c: f64, rounding: i32) -> f64;
43191
43192    #[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
43193    fn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43194    #[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
43195    fn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43196    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
43197    fn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43198    #[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
43199    fn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43200
43201    #[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
43202    fn vcvtss2sd(a: f64x2, b: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
43203    #[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
43204    fn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43205
43206    #[link_name = "llvm.x86.avx512.vcvtss2si32"]
43207    fn vcvtss2si(a: f32x4, rounding: i32) -> i32;
43208    #[link_name = "llvm.x86.avx512.vcvtss2usi32"]
43209    fn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
43210
43211    #[link_name = "llvm.x86.avx512.vcvtsd2si32"]
43212    fn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
43213    #[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
43214    fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
43215
43216    #[link_name = "llvm.x86.avx512.cvtsi2ss32"]
43217    fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
43218
43219    #[link_name = "llvm.x86.avx512.cvtusi2ss"]
43220    fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
43221
43222    #[link_name = "llvm.x86.avx512.cvttss2si"]
43223    fn vcvttss2si(a: f32x4, rounding: i32) -> i32;
43224    #[link_name = "llvm.x86.avx512.cvttss2usi"]
43225    fn vcvttss2usi(a: f32x4, rounding: i32) -> u32;
43226
43227    #[link_name = "llvm.x86.avx512.cvttsd2si"]
43228    fn vcvttsd2si(a: f64x2, rounding: i32) -> i32;
43229    #[link_name = "llvm.x86.avx512.cvttsd2usi"]
43230    fn vcvttsd2usi(a: f64x2, rounding: i32) -> u32;
43231
43232    #[link_name = "llvm.x86.avx512.vcomi.ss"]
43233    fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
43234    #[link_name = "llvm.x86.avx512.vcomi.sd"]
43235    fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
43236
43237    #[link_name = "llvm.x86.avx512.mask.loadu.d.128"]
43238    fn loaddqu32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43239    #[link_name = "llvm.x86.avx512.mask.loadu.q.128"]
43240    fn loaddqu64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43241    #[link_name = "llvm.x86.avx512.mask.loadu.ps.128"]
43242    fn loadups_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43243    #[link_name = "llvm.x86.avx512.mask.loadu.pd.128"]
43244    fn loadupd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43245    #[link_name = "llvm.x86.avx512.mask.loadu.d.256"]
43246    fn loaddqu32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43247    #[link_name = "llvm.x86.avx512.mask.loadu.q.256"]
43248    fn loaddqu64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43249    #[link_name = "llvm.x86.avx512.mask.loadu.ps.256"]
43250    fn loadups_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43251    #[link_name = "llvm.x86.avx512.mask.loadu.pd.256"]
43252    fn loadupd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43253    #[link_name = "llvm.x86.avx512.mask.loadu.d.512"]
43254    fn loaddqu32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43255    #[link_name = "llvm.x86.avx512.mask.loadu.q.512"]
43256    fn loaddqu64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43257    #[link_name = "llvm.x86.avx512.mask.loadu.ps.512"]
43258    fn loadups_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43259    #[link_name = "llvm.x86.avx512.mask.loadu.pd.512"]
43260    fn loadupd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43261
43262    #[link_name = "llvm.x86.avx512.mask.load.d.128"]
43263    fn loaddqa32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43264    #[link_name = "llvm.x86.avx512.mask.load.q.128"]
43265    fn loaddqa64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43266    #[link_name = "llvm.x86.avx512.mask.load.ps.128"]
43267    fn loadaps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43268    #[link_name = "llvm.x86.avx512.mask.load.pd.128"]
43269    fn loadapd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43270    #[link_name = "llvm.x86.avx512.mask.load.d.256"]
43271    fn loaddqa32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43272    #[link_name = "llvm.x86.avx512.mask.load.q.256"]
43273    fn loaddqa64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43274    #[link_name = "llvm.x86.avx512.mask.load.ps.256"]
43275    fn loadaps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43276    #[link_name = "llvm.x86.avx512.mask.load.pd.256"]
43277    fn loadapd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43278    #[link_name = "llvm.x86.avx512.mask.load.d.512"]
43279    fn loaddqa32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43280    #[link_name = "llvm.x86.avx512.mask.load.q.512"]
43281    fn loaddqa64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43282    #[link_name = "llvm.x86.avx512.mask.load.ps.512"]
43283    fn loadaps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43284    #[link_name = "llvm.x86.avx512.mask.load.pd.512"]
43285    fn loadapd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43286
43287    #[link_name = "llvm.x86.avx512.mask.storeu.d.128"]
43288    fn storedqu32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
43289    #[link_name = "llvm.x86.avx512.mask.storeu.q.128"]
43290    fn storedqu64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
43291    #[link_name = "llvm.x86.avx512.mask.storeu.ps.128"]
43292    fn storeups_128(mem_addr: *mut f32, a: f32x4, mask: u8);
43293    #[link_name = "llvm.x86.avx512.mask.storeu.pd.128"]
43294    fn storeupd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
43295    #[link_name = "llvm.x86.avx512.mask.storeu.d.256"]
43296    fn storedqu32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
43297    #[link_name = "llvm.x86.avx512.mask.storeu.q.256"]
43298    fn storedqu64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
43299    #[link_name = "llvm.x86.avx512.mask.storeu.ps.256"]
43300    fn storeups_256(mem_addr: *mut f32, a: f32x8, mask: u8);
43301    #[link_name = "llvm.x86.avx512.mask.storeu.pd.256"]
43302    fn storeupd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
43303    #[link_name = "llvm.x86.avx512.mask.storeu.d.512"]
43304    fn storedqu32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
43305    #[link_name = "llvm.x86.avx512.mask.storeu.q.512"]
43306    fn storedqu64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
43307    #[link_name = "llvm.x86.avx512.mask.storeu.ps.512"]
43308    fn storeups_512(mem_addr: *mut f32, a: f32x16, mask: u16);
43309    #[link_name = "llvm.x86.avx512.mask.storeu.pd.512"]
43310    fn storeupd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
43311
43312    #[link_name = "llvm.x86.avx512.mask.store.d.128"]
43313    fn storedqa32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
43314    #[link_name = "llvm.x86.avx512.mask.store.q.128"]
43315    fn storedqa64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
43316    #[link_name = "llvm.x86.avx512.mask.store.ps.128"]
43317    fn storeaps_128(mem_addr: *mut f32, a: f32x4, mask: u8);
43318    #[link_name = "llvm.x86.avx512.mask.store.pd.128"]
43319    fn storeapd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
43320    #[link_name = "llvm.x86.avx512.mask.store.d.256"]
43321    fn storedqa32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
43322    #[link_name = "llvm.x86.avx512.mask.store.q.256"]
43323    fn storedqa64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
43324    #[link_name = "llvm.x86.avx512.mask.store.ps.256"]
43325    fn storeaps_256(mem_addr: *mut f32, a: f32x8, mask: u8);
43326    #[link_name = "llvm.x86.avx512.mask.store.pd.256"]
43327    fn storeapd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
43328    #[link_name = "llvm.x86.avx512.mask.store.d.512"]
43329    fn storedqa32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
43330    #[link_name = "llvm.x86.avx512.mask.store.q.512"]
43331    fn storedqa64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
43332    #[link_name = "llvm.x86.avx512.mask.store.ps.512"]
43333    fn storeaps_512(mem_addr: *mut f32, a: f32x16, mask: u16);
43334    #[link_name = "llvm.x86.avx512.mask.store.pd.512"]
43335    fn storeapd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
43336
43337    #[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
43338    fn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43339    #[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
43340    fn expandloadq_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43341    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.128"]
43342    fn expandloadps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43343    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.128"]
43344    fn expandloadpd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43345    #[link_name = "llvm.x86.avx512.mask.expand.load.d.256"]
43346    fn expandloadd_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43347    #[link_name = "llvm.x86.avx512.mask.expand.load.q.256"]
43348    fn expandloadq_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43349    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.256"]
43350    fn expandloadps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43351    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.256"]
43352    fn expandloadpd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43353    #[link_name = "llvm.x86.avx512.mask.expand.load.d.512"]
43354    fn expandloadd_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43355    #[link_name = "llvm.x86.avx512.mask.expand.load.q.512"]
43356    fn expandloadq_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43357    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.512"]
43358    fn expandloadps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43359    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.512"]
43360    fn expandloadpd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43361
43362}
43363
43364#[cfg(test)]
43365mod tests {
43366
43367    use stdarch_test::simd_test;
43368
43369    use crate::core_arch::x86::*;
43370    use crate::hint::black_box;
43371    use crate::mem::{self};
43372
43373    #[simd_test(enable = "avx512f")]
43374    unsafe fn test_mm512_abs_epi32() {
43375        #[rustfmt::skip]
43376        let a = _mm512_setr_epi32(
43377            0, 1, -1, i32::MAX,
43378            i32::MIN, 100, -100, -32,
43379            0, 1, -1, i32::MAX,
43380            i32::MIN, 100, -100, -32,
43381        );
43382        let r = _mm512_abs_epi32(a);
43383        #[rustfmt::skip]
43384        let e = _mm512_setr_epi32(
43385            0, 1, 1, i32::MAX,
43386            i32::MAX.wrapping_add(1), 100, 100, 32,
43387            0, 1, 1, i32::MAX,
43388            i32::MAX.wrapping_add(1), 100, 100, 32,
43389        );
43390        assert_eq_m512i(r, e);
43391    }
43392
43393    #[simd_test(enable = "avx512f")]
43394    unsafe fn test_mm512_mask_abs_epi32() {
43395        #[rustfmt::skip]
43396        let a = _mm512_setr_epi32(
43397            0, 1, -1, i32::MAX,
43398            i32::MIN, 100, -100, -32,
43399            0, 1, -1, i32::MAX,
43400            i32::MIN, 100, -100, -32,
43401        );
43402        let r = _mm512_mask_abs_epi32(a, 0, a);
43403        assert_eq_m512i(r, a);
43404        let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
43405        #[rustfmt::skip]
43406        let e = _mm512_setr_epi32(
43407            0, 1, 1, i32::MAX,
43408            i32::MAX.wrapping_add(1), 100, 100, 32,
43409            0, 1, -1, i32::MAX,
43410            i32::MIN, 100, -100, -32,
43411        );
43412        assert_eq_m512i(r, e);
43413    }
43414
43415    #[simd_test(enable = "avx512f")]
43416    unsafe fn test_mm512_maskz_abs_epi32() {
43417        #[rustfmt::skip]
43418        let a = _mm512_setr_epi32(
43419            0, 1, -1, i32::MAX,
43420            i32::MIN, 100, -100, -32,
43421            0, 1, -1, i32::MAX,
43422            i32::MIN, 100, -100, -32,
43423        );
43424        let r = _mm512_maskz_abs_epi32(0, a);
43425        assert_eq_m512i(r, _mm512_setzero_si512());
43426        let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
43427        #[rustfmt::skip]
43428        let e = _mm512_setr_epi32(
43429            0, 1, 1, i32::MAX,
43430            i32::MAX.wrapping_add(1), 100, 100, 32,
43431            0, 0, 0, 0,
43432            0, 0, 0, 0,
43433        );
43434        assert_eq_m512i(r, e);
43435    }
43436
43437    #[simd_test(enable = "avx512f,avx512vl")]
43438    unsafe fn test_mm256_mask_abs_epi32() {
43439        #[rustfmt::skip]
43440        let a = _mm256_setr_epi32(
43441            0, 1, -1, i32::MAX,
43442            i32::MIN, 100, -100, -32,
43443        );
43444        let r = _mm256_mask_abs_epi32(a, 0, a);
43445        assert_eq_m256i(r, a);
43446        let r = _mm256_mask_abs_epi32(a, 0b00001111, a);
43447        #[rustfmt::skip]
43448        let e = _mm256_setr_epi32(
43449            0, 1, 1, i32::MAX,
43450            i32::MAX.wrapping_add(1), 100, -100, -32,
43451        );
43452        assert_eq_m256i(r, e);
43453    }
43454
43455    #[simd_test(enable = "avx512f,avx512vl")]
43456    unsafe fn test_mm256_maskz_abs_epi32() {
43457        #[rustfmt::skip]
43458        let a = _mm256_setr_epi32(
43459            0, 1, -1, i32::MAX,
43460            i32::MIN, 100, -100, -32,
43461        );
43462        let r = _mm256_maskz_abs_epi32(0, a);
43463        assert_eq_m256i(r, _mm256_setzero_si256());
43464        let r = _mm256_maskz_abs_epi32(0b00001111, a);
43465        #[rustfmt::skip]
43466        let e = _mm256_setr_epi32(
43467            0, 1, 1, i32::MAX,
43468            0, 0, 0, 0,
43469        );
43470        assert_eq_m256i(r, e);
43471    }
43472
43473    #[simd_test(enable = "avx512f,avx512vl")]
43474    unsafe fn test_mm_mask_abs_epi32() {
43475        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
43476        let r = _mm_mask_abs_epi32(a, 0, a);
43477        assert_eq_m128i(r, a);
43478        let r = _mm_mask_abs_epi32(a, 0b00001111, a);
43479        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
43480        assert_eq_m128i(r, e);
43481    }
43482
43483    #[simd_test(enable = "avx512f,avx512vl")]
43484    unsafe fn test_mm_maskz_abs_epi32() {
43485        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
43486        let r = _mm_maskz_abs_epi32(0, a);
43487        assert_eq_m128i(r, _mm_setzero_si128());
43488        let r = _mm_maskz_abs_epi32(0b00001111, a);
43489        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
43490        assert_eq_m128i(r, e);
43491    }
43492
43493    #[simd_test(enable = "avx512f")]
43494    unsafe fn test_mm512_abs_ps() {
43495        #[rustfmt::skip]
43496        let a = _mm512_setr_ps(
43497            0., 1., -1., f32::MAX,
43498            f32::MIN, 100., -100., -32.,
43499            0., 1., -1., f32::MAX,
43500            f32::MIN, 100., -100., -32.,
43501        );
43502        let r = _mm512_abs_ps(a);
43503        #[rustfmt::skip]
43504        let e = _mm512_setr_ps(
43505            0., 1., 1., f32::MAX,
43506            f32::MAX, 100., 100., 32.,
43507            0., 1., 1., f32::MAX,
43508            f32::MAX, 100., 100., 32.,
43509        );
43510        assert_eq_m512(r, e);
43511    }
43512
43513    #[simd_test(enable = "avx512f")]
43514    unsafe fn test_mm512_mask_abs_ps() {
43515        #[rustfmt::skip]
43516        let a = _mm512_setr_ps(
43517            0., 1., -1., f32::MAX,
43518            f32::MIN, 100., -100., -32.,
43519            0., 1., -1., f32::MAX,
43520            f32::MIN, 100., -100., -32.,
43521        );
43522        let r = _mm512_mask_abs_ps(a, 0, a);
43523        assert_eq_m512(r, a);
43524        let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
43525        #[rustfmt::skip]
43526        let e = _mm512_setr_ps(
43527            0., 1., 1., f32::MAX,
43528            f32::MAX, 100., 100., 32.,
43529            0., 1., -1., f32::MAX,
43530            f32::MIN, 100., -100., -32.,
43531        );
43532        assert_eq_m512(r, e);
43533    }
43534
43535    #[simd_test(enable = "avx512f")]
43536    unsafe fn test_mm512_mask_mov_epi32() {
43537        let src = _mm512_set1_epi32(1);
43538        let a = _mm512_set1_epi32(2);
43539        let r = _mm512_mask_mov_epi32(src, 0, a);
43540        assert_eq_m512i(r, src);
43541        let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a);
43542        assert_eq_m512i(r, a);
43543    }
43544
43545    #[simd_test(enable = "avx512f")]
43546    unsafe fn test_mm512_maskz_mov_epi32() {
43547        let a = _mm512_set1_epi32(2);
43548        let r = _mm512_maskz_mov_epi32(0, a);
43549        assert_eq_m512i(r, _mm512_setzero_si512());
43550        let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a);
43551        assert_eq_m512i(r, a);
43552    }
43553
43554    #[simd_test(enable = "avx512f,avx512vl")]
43555    unsafe fn test_mm256_mask_mov_epi32() {
43556        let src = _mm256_set1_epi32(1);
43557        let a = _mm256_set1_epi32(2);
43558        let r = _mm256_mask_mov_epi32(src, 0, a);
43559        assert_eq_m256i(r, src);
43560        let r = _mm256_mask_mov_epi32(src, 0b11111111, a);
43561        assert_eq_m256i(r, a);
43562    }
43563
43564    #[simd_test(enable = "avx512f,avx512vl")]
43565    unsafe fn test_mm256_maskz_mov_epi32() {
43566        let a = _mm256_set1_epi32(2);
43567        let r = _mm256_maskz_mov_epi32(0, a);
43568        assert_eq_m256i(r, _mm256_setzero_si256());
43569        let r = _mm256_maskz_mov_epi32(0b11111111, a);
43570        assert_eq_m256i(r, a);
43571    }
43572
43573    #[simd_test(enable = "avx512f,avx512vl")]
43574    unsafe fn test_mm_mask_mov_epi32() {
43575        let src = _mm_set1_epi32(1);
43576        let a = _mm_set1_epi32(2);
43577        let r = _mm_mask_mov_epi32(src, 0, a);
43578        assert_eq_m128i(r, src);
43579        let r = _mm_mask_mov_epi32(src, 0b00001111, a);
43580        assert_eq_m128i(r, a);
43581    }
43582
43583    #[simd_test(enable = "avx512f,avx512vl")]
43584    unsafe fn test_mm_maskz_mov_epi32() {
43585        let a = _mm_set1_epi32(2);
43586        let r = _mm_maskz_mov_epi32(0, a);
43587        assert_eq_m128i(r, _mm_setzero_si128());
43588        let r = _mm_maskz_mov_epi32(0b00001111, a);
43589        assert_eq_m128i(r, a);
43590    }
43591
43592    #[simd_test(enable = "avx512f")]
43593    unsafe fn test_mm512_mask_mov_ps() {
43594        let src = _mm512_set1_ps(1.);
43595        let a = _mm512_set1_ps(2.);
43596        let r = _mm512_mask_mov_ps(src, 0, a);
43597        assert_eq_m512(r, src);
43598        let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a);
43599        assert_eq_m512(r, a);
43600    }
43601
43602    #[simd_test(enable = "avx512f")]
43603    unsafe fn test_mm512_maskz_mov_ps() {
43604        let a = _mm512_set1_ps(2.);
43605        let r = _mm512_maskz_mov_ps(0, a);
43606        assert_eq_m512(r, _mm512_setzero_ps());
43607        let r = _mm512_maskz_mov_ps(0b11111111_11111111, a);
43608        assert_eq_m512(r, a);
43609    }
43610
43611    #[simd_test(enable = "avx512f,avx512vl")]
43612    unsafe fn test_mm256_mask_mov_ps() {
43613        let src = _mm256_set1_ps(1.);
43614        let a = _mm256_set1_ps(2.);
43615        let r = _mm256_mask_mov_ps(src, 0, a);
43616        assert_eq_m256(r, src);
43617        let r = _mm256_mask_mov_ps(src, 0b11111111, a);
43618        assert_eq_m256(r, a);
43619    }
43620
43621    #[simd_test(enable = "avx512f,avx512vl")]
43622    unsafe fn test_mm256_maskz_mov_ps() {
43623        let a = _mm256_set1_ps(2.);
43624        let r = _mm256_maskz_mov_ps(0, a);
43625        assert_eq_m256(r, _mm256_setzero_ps());
43626        let r = _mm256_maskz_mov_ps(0b11111111, a);
43627        assert_eq_m256(r, a);
43628    }
43629
43630    #[simd_test(enable = "avx512f,avx512vl")]
43631    unsafe fn test_mm_mask_mov_ps() {
43632        let src = _mm_set1_ps(1.);
43633        let a = _mm_set1_ps(2.);
43634        let r = _mm_mask_mov_ps(src, 0, a);
43635        assert_eq_m128(r, src);
43636        let r = _mm_mask_mov_ps(src, 0b00001111, a);
43637        assert_eq_m128(r, a);
43638    }
43639
43640    #[simd_test(enable = "avx512f,avx512vl")]
43641    unsafe fn test_mm_maskz_mov_ps() {
43642        let a = _mm_set1_ps(2.);
43643        let r = _mm_maskz_mov_ps(0, a);
43644        assert_eq_m128(r, _mm_setzero_ps());
43645        let r = _mm_maskz_mov_ps(0b00001111, a);
43646        assert_eq_m128(r, a);
43647    }
43648
43649    #[simd_test(enable = "avx512f")]
43650    unsafe fn test_mm512_add_epi32() {
43651        #[rustfmt::skip]
43652        let a = _mm512_setr_epi32(
43653            0, 1, -1, i32::MAX,
43654            i32::MIN, 100, -100, -32,
43655            0, 1, -1, i32::MAX,
43656            i32::MIN, 100, -100, -32,
43657        );
43658        let b = _mm512_set1_epi32(1);
43659        let r = _mm512_add_epi32(a, b);
43660        #[rustfmt::skip]
43661        let e = _mm512_setr_epi32(
43662            1, 2, 0, i32::MIN,
43663            i32::MIN + 1, 101, -99, -31,
43664            1, 2, 0, i32::MIN,
43665            i32::MIN + 1, 101, -99, -31,
43666        );
43667        assert_eq_m512i(r, e);
43668    }
43669
43670    #[simd_test(enable = "avx512f")]
43671    unsafe fn test_mm512_mask_add_epi32() {
43672        #[rustfmt::skip]
43673        let a = _mm512_setr_epi32(
43674            0, 1, -1, i32::MAX,
43675            i32::MIN, 100, -100, -32,
43676            0, 1, -1, i32::MAX,
43677            i32::MIN, 100, -100, -32,
43678        );
43679        let b = _mm512_set1_epi32(1);
43680        let r = _mm512_mask_add_epi32(a, 0, a, b);
43681        assert_eq_m512i(r, a);
43682        let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
43683        #[rustfmt::skip]
43684        let e = _mm512_setr_epi32(
43685            1, 2, 0, i32::MIN,
43686            i32::MIN + 1, 101, -99, -31,
43687            0, 1, -1, i32::MAX,
43688            i32::MIN, 100, -100, -32,
43689        );
43690        assert_eq_m512i(r, e);
43691    }
43692
43693    #[simd_test(enable = "avx512f")]
43694    unsafe fn test_mm512_maskz_add_epi32() {
43695        #[rustfmt::skip]
43696        let a = _mm512_setr_epi32(
43697            0, 1, -1, i32::MAX,
43698            i32::MIN, 100, -100, -32,
43699            0, 1, -1, i32::MAX,
43700            i32::MIN, 100, -100, -32,
43701        );
43702        let b = _mm512_set1_epi32(1);
43703        let r = _mm512_maskz_add_epi32(0, a, b);
43704        assert_eq_m512i(r, _mm512_setzero_si512());
43705        let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
43706        #[rustfmt::skip]
43707        let e = _mm512_setr_epi32(
43708            1, 2, 0, i32::MIN,
43709            i32::MIN + 1, 101, -99, -31,
43710            0, 0, 0, 0,
43711            0, 0, 0, 0,
43712        );
43713        assert_eq_m512i(r, e);
43714    }
43715
43716    #[simd_test(enable = "avx512f,avx512vl")]
43717    unsafe fn test_mm256_mask_add_epi32() {
43718        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43719        let b = _mm256_set1_epi32(1);
43720        let r = _mm256_mask_add_epi32(a, 0, a, b);
43721        assert_eq_m256i(r, a);
43722        let r = _mm256_mask_add_epi32(a, 0b11111111, a, b);
43723        let e = _mm256_set_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
43724        assert_eq_m256i(r, e);
43725    }
43726
43727    #[simd_test(enable = "avx512f,avx512vl")]
43728    unsafe fn test_mm256_maskz_add_epi32() {
43729        let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43730        let b = _mm256_set1_epi32(1);
43731        let r = _mm256_maskz_add_epi32(0, a, b);
43732        assert_eq_m256i(r, _mm256_setzero_si256());
43733        let r = _mm256_maskz_add_epi32(0b11111111, a, b);
43734        let e = _mm256_setr_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
43735        assert_eq_m256i(r, e);
43736    }
43737
43738    #[simd_test(enable = "avx512f,avx512vl")]
43739    unsafe fn test_mm_mask_add_epi32() {
43740        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43741        let b = _mm_set1_epi32(1);
43742        let r = _mm_mask_add_epi32(a, 0, a, b);
43743        assert_eq_m128i(r, a);
43744        let r = _mm_mask_add_epi32(a, 0b00001111, a, b);
43745        let e = _mm_set_epi32(2, 0, i32::MIN, i32::MIN + 1);
43746        assert_eq_m128i(r, e);
43747    }
43748
43749    #[simd_test(enable = "avx512f,avx512vl")]
43750    unsafe fn test_mm_maskz_add_epi32() {
43751        let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN);
43752        let b = _mm_set1_epi32(1);
43753        let r = _mm_maskz_add_epi32(0, a, b);
43754        assert_eq_m128i(r, _mm_setzero_si128());
43755        let r = _mm_maskz_add_epi32(0b00001111, a, b);
43756        let e = _mm_setr_epi32(2, 0, i32::MIN, i32::MIN + 1);
43757        assert_eq_m128i(r, e);
43758    }
43759
43760    #[simd_test(enable = "avx512f")]
43761    unsafe fn test_mm512_add_ps() {
43762        #[rustfmt::skip]
43763        let a = _mm512_setr_ps(
43764            0., 1., -1., f32::MAX,
43765            f32::MIN, 100., -100., -32.,
43766            0., 1., -1., f32::MAX,
43767            f32::MIN, 100., -100., -32.,
43768        );
43769        let b = _mm512_set1_ps(1.);
43770        let r = _mm512_add_ps(a, b);
43771        #[rustfmt::skip]
43772        let e = _mm512_setr_ps(
43773            1., 2., 0., f32::MAX,
43774            f32::MIN + 1., 101., -99., -31.,
43775            1., 2., 0., f32::MAX,
43776            f32::MIN + 1., 101., -99., -31.,
43777        );
43778        assert_eq_m512(r, e);
43779    }
43780
43781    #[simd_test(enable = "avx512f")]
43782    unsafe fn test_mm512_mask_add_ps() {
43783        #[rustfmt::skip]
43784        let a = _mm512_setr_ps(
43785            0., 1., -1., f32::MAX,
43786            f32::MIN, 100., -100., -32.,
43787            0., 1., -1., f32::MAX,
43788            f32::MIN, 100., -100., -32.,
43789        );
43790        let b = _mm512_set1_ps(1.);
43791        let r = _mm512_mask_add_ps(a, 0, a, b);
43792        assert_eq_m512(r, a);
43793        let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
43794        #[rustfmt::skip]
43795        let e = _mm512_setr_ps(
43796            1., 2., 0., f32::MAX,
43797            f32::MIN + 1., 101., -99., -31.,
43798            0., 1., -1., f32::MAX,
43799            f32::MIN, 100., -100., -32.,
43800        );
43801        assert_eq_m512(r, e);
43802    }
43803
43804    #[simd_test(enable = "avx512f")]
43805    unsafe fn test_mm512_maskz_add_ps() {
43806        #[rustfmt::skip]
43807        let a = _mm512_setr_ps(
43808            0., 1., -1., f32::MAX,
43809            f32::MIN, 100., -100., -32.,
43810            0., 1., -1., f32::MAX,
43811            f32::MIN, 100., -100., -32.,
43812        );
43813        let b = _mm512_set1_ps(1.);
43814        let r = _mm512_maskz_add_ps(0, a, b);
43815        assert_eq_m512(r, _mm512_setzero_ps());
43816        let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
43817        #[rustfmt::skip]
43818        let e = _mm512_setr_ps(
43819            1., 2., 0., f32::MAX,
43820            f32::MIN + 1., 101., -99., -31.,
43821            0., 0., 0., 0.,
43822            0., 0., 0., 0.,
43823        );
43824        assert_eq_m512(r, e);
43825    }
43826
43827    #[simd_test(enable = "avx512f,avx512vl")]
43828    unsafe fn test_mm256_mask_add_ps() {
43829        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43830        let b = _mm256_set1_ps(1.);
43831        let r = _mm256_mask_add_ps(a, 0, a, b);
43832        assert_eq_m256(r, a);
43833        let r = _mm256_mask_add_ps(a, 0b11111111, a, b);
43834        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
43835        assert_eq_m256(r, e);
43836    }
43837
43838    #[simd_test(enable = "avx512f,avx512vl")]
43839    unsafe fn test_mm256_maskz_add_ps() {
43840        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43841        let b = _mm256_set1_ps(1.);
43842        let r = _mm256_maskz_add_ps(0, a, b);
43843        assert_eq_m256(r, _mm256_setzero_ps());
43844        let r = _mm256_maskz_add_ps(0b11111111, a, b);
43845        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
43846        assert_eq_m256(r, e);
43847    }
43848
43849    #[simd_test(enable = "avx512f,avx512vl")]
43850    unsafe fn test_mm_mask_add_ps() {
43851        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43852        let b = _mm_set1_ps(1.);
43853        let r = _mm_mask_add_ps(a, 0, a, b);
43854        assert_eq_m128(r, a);
43855        let r = _mm_mask_add_ps(a, 0b00001111, a, b);
43856        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
43857        assert_eq_m128(r, e);
43858    }
43859
43860    #[simd_test(enable = "avx512f,avx512vl")]
43861    unsafe fn test_mm_maskz_add_ps() {
43862        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43863        let b = _mm_set1_ps(1.);
43864        let r = _mm_maskz_add_ps(0, a, b);
43865        assert_eq_m128(r, _mm_setzero_ps());
43866        let r = _mm_maskz_add_ps(0b00001111, a, b);
43867        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
43868        assert_eq_m128(r, e);
43869    }
43870
43871    #[simd_test(enable = "avx512f")]
43872    unsafe fn test_mm512_sub_epi32() {
43873        #[rustfmt::skip]
43874        let a = _mm512_setr_epi32(
43875            0, 1, -1, i32::MAX,
43876            i32::MIN, 100, -100, -32,
43877            0, 1, -1, i32::MAX,
43878            i32::MIN, 100, -100, -32,
43879        );
43880        let b = _mm512_set1_epi32(1);
43881        let r = _mm512_sub_epi32(a, b);
43882        #[rustfmt::skip]
43883        let e = _mm512_setr_epi32(
43884            -1, 0, -2, i32::MAX - 1,
43885            i32::MAX, 99, -101, -33,
43886            -1, 0, -2, i32::MAX - 1,
43887            i32::MAX, 99, -101, -33,
43888        );
43889        assert_eq_m512i(r, e);
43890    }
43891
43892    #[simd_test(enable = "avx512f")]
43893    unsafe fn test_mm512_mask_sub_epi32() {
43894        #[rustfmt::skip]
43895        let a = _mm512_setr_epi32(
43896            0, 1, -1, i32::MAX,
43897            i32::MIN, 100, -100, -32,
43898            0, 1, -1, i32::MAX,
43899            i32::MIN, 100, -100, -32,
43900        );
43901        let b = _mm512_set1_epi32(1);
43902        let r = _mm512_mask_sub_epi32(a, 0, a, b);
43903        assert_eq_m512i(r, a);
43904        let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
43905        #[rustfmt::skip]
43906        let e = _mm512_setr_epi32(
43907            -1, 0, -2, i32::MAX - 1,
43908            i32::MAX, 99, -101, -33,
43909            0, 1, -1, i32::MAX,
43910            i32::MIN, 100, -100, -32,
43911        );
43912        assert_eq_m512i(r, e);
43913    }
43914
43915    #[simd_test(enable = "avx512f")]
43916    unsafe fn test_mm512_maskz_sub_epi32() {
43917        #[rustfmt::skip]
43918        let a = _mm512_setr_epi32(
43919            0, 1, -1, i32::MAX,
43920            i32::MIN, 100, -100, -32,
43921            0, 1, -1, i32::MAX,
43922            i32::MIN, 100, -100, -32,
43923        );
43924        let b = _mm512_set1_epi32(1);
43925        let r = _mm512_maskz_sub_epi32(0, a, b);
43926        assert_eq_m512i(r, _mm512_setzero_si512());
43927        let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
43928        #[rustfmt::skip]
43929        let e = _mm512_setr_epi32(
43930            -1, 0, -2, i32::MAX - 1,
43931            i32::MAX, 99, -101, -33,
43932            0, 0, 0, 0,
43933            0, 0, 0, 0,
43934        );
43935        assert_eq_m512i(r, e);
43936    }
43937
43938    #[simd_test(enable = "avx512f,avx512vl")]
43939    unsafe fn test_mm256_mask_sub_epi32() {
43940        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43941        let b = _mm256_set1_epi32(1);
43942        let r = _mm256_mask_sub_epi32(a, 0, a, b);
43943        assert_eq_m256i(r, a);
43944        let r = _mm256_mask_sub_epi32(a, 0b11111111, a, b);
43945        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
43946        assert_eq_m256i(r, e);
43947    }
43948
43949    #[simd_test(enable = "avx512f,avx512vl")]
43950    unsafe fn test_mm256_maskz_sub_epi32() {
43951        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43952        let b = _mm256_set1_epi32(1);
43953        let r = _mm256_maskz_sub_epi32(0, a, b);
43954        assert_eq_m256i(r, _mm256_setzero_si256());
43955        let r = _mm256_maskz_sub_epi32(0b11111111, a, b);
43956        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
43957        assert_eq_m256i(r, e);
43958    }
43959
43960    #[simd_test(enable = "avx512f,avx512vl")]
43961    unsafe fn test_mm_mask_sub_epi32() {
43962        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43963        let b = _mm_set1_epi32(1);
43964        let r = _mm_mask_sub_epi32(a, 0, a, b);
43965        assert_eq_m128i(r, a);
43966        let r = _mm_mask_sub_epi32(a, 0b00001111, a, b);
43967        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
43968        assert_eq_m128i(r, e);
43969    }
43970
43971    #[simd_test(enable = "avx512f,avx512vl")]
43972    unsafe fn test_mm_maskz_sub_epi32() {
43973        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43974        let b = _mm_set1_epi32(1);
43975        let r = _mm_maskz_sub_epi32(0, a, b);
43976        assert_eq_m128i(r, _mm_setzero_si128());
43977        let r = _mm_maskz_sub_epi32(0b00001111, a, b);
43978        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
43979        assert_eq_m128i(r, e);
43980    }
43981
43982    #[simd_test(enable = "avx512f")]
43983    unsafe fn test_mm512_sub_ps() {
43984        #[rustfmt::skip]
43985        let a = _mm512_setr_ps(
43986            0., 1., -1., f32::MAX,
43987            f32::MIN, 100., -100., -32.,
43988            0., 1., -1., f32::MAX,
43989            f32::MIN, 100., -100., -32.,
43990        );
43991        let b = _mm512_set1_ps(1.);
43992        let r = _mm512_sub_ps(a, b);
43993        #[rustfmt::skip]
43994        let e = _mm512_setr_ps(
43995            -1., 0., -2., f32::MAX - 1.,
43996            f32::MIN, 99., -101., -33.,
43997            -1., 0., -2., f32::MAX - 1.,
43998            f32::MIN, 99., -101., -33.,
43999        );
44000        assert_eq_m512(r, e);
44001    }
44002
44003    #[simd_test(enable = "avx512f")]
44004    unsafe fn test_mm512_mask_sub_ps() {
44005        #[rustfmt::skip]
44006        let a = _mm512_setr_ps(
44007            0., 1., -1., f32::MAX,
44008            f32::MIN, 100., -100., -32.,
44009            0., 1., -1., f32::MAX,
44010            f32::MIN, 100., -100., -32.,
44011        );
44012        let b = _mm512_set1_ps(1.);
44013        let r = _mm512_mask_sub_ps(a, 0, a, b);
44014        assert_eq_m512(r, a);
44015        let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
44016        #[rustfmt::skip]
44017        let e = _mm512_setr_ps(
44018            -1., 0., -2., f32::MAX - 1.,
44019            f32::MIN, 99., -101., -33.,
44020            0., 1., -1., f32::MAX,
44021            f32::MIN, 100., -100., -32.,
44022        );
44023        assert_eq_m512(r, e);
44024    }
44025
44026    #[simd_test(enable = "avx512f")]
44027    unsafe fn test_mm512_maskz_sub_ps() {
44028        #[rustfmt::skip]
44029        let a = _mm512_setr_ps(
44030            0., 1., -1., f32::MAX,
44031            f32::MIN, 100., -100., -32.,
44032            0., 1., -1., f32::MAX,
44033            f32::MIN, 100., -100., -32.,
44034        );
44035        let b = _mm512_set1_ps(1.);
44036        let r = _mm512_maskz_sub_ps(0, a, b);
44037        assert_eq_m512(r, _mm512_setzero_ps());
44038        let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
44039        #[rustfmt::skip]
44040        let e = _mm512_setr_ps(
44041            -1., 0., -2., f32::MAX - 1.,
44042            f32::MIN, 99., -101., -33.,
44043            0., 0., 0., 0.,
44044            0., 0., 0., 0.,
44045        );
44046        assert_eq_m512(r, e);
44047    }
44048
44049    #[simd_test(enable = "avx512f,avx512vl")]
44050    unsafe fn test_mm256_mask_sub_ps() {
44051        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44052        let b = _mm256_set1_ps(1.);
44053        let r = _mm256_mask_sub_ps(a, 0, a, b);
44054        assert_eq_m256(r, a);
44055        let r = _mm256_mask_sub_ps(a, 0b11111111, a, b);
44056        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
44057        assert_eq_m256(r, e);
44058    }
44059
44060    #[simd_test(enable = "avx512f,avx512vl")]
44061    unsafe fn test_mm256_maskz_sub_ps() {
44062        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44063        let b = _mm256_set1_ps(1.);
44064        let r = _mm256_maskz_sub_ps(0, a, b);
44065        assert_eq_m256(r, _mm256_setzero_ps());
44066        let r = _mm256_maskz_sub_ps(0b11111111, a, b);
44067        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
44068        assert_eq_m256(r, e);
44069    }
44070
44071    #[simd_test(enable = "avx512f,avx512vl")]
44072    unsafe fn test_mm_mask_sub_ps() {
44073        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44074        let b = _mm_set1_ps(1.);
44075        let r = _mm_mask_sub_ps(a, 0, a, b);
44076        assert_eq_m128(r, a);
44077        let r = _mm_mask_sub_ps(a, 0b00001111, a, b);
44078        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
44079        assert_eq_m128(r, e);
44080    }
44081
44082    #[simd_test(enable = "avx512f,avx512vl")]
44083    unsafe fn test_mm_maskz_sub_ps() {
44084        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44085        let b = _mm_set1_ps(1.);
44086        let r = _mm_maskz_sub_ps(0, a, b);
44087        assert_eq_m128(r, _mm_setzero_ps());
44088        let r = _mm_maskz_sub_ps(0b00001111, a, b);
44089        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
44090        assert_eq_m128(r, e);
44091    }
44092
44093    #[simd_test(enable = "avx512f")]
44094    unsafe fn test_mm512_mullo_epi32() {
44095        #[rustfmt::skip]
44096        let a = _mm512_setr_epi32(
44097            0, 1, -1, i32::MAX,
44098            i32::MIN, 100, -100, -32,
44099            0, 1, -1, i32::MAX,
44100            i32::MIN, 100, -100, -32,
44101        );
44102        let b = _mm512_set1_epi32(2);
44103        let r = _mm512_mullo_epi32(a, b);
44104        let e = _mm512_setr_epi32(
44105            0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
44106        );
44107        assert_eq_m512i(r, e);
44108    }
44109
44110    #[simd_test(enable = "avx512f")]
44111    unsafe fn test_mm512_mask_mullo_epi32() {
44112        #[rustfmt::skip]
44113        let a = _mm512_setr_epi32(
44114            0, 1, -1, i32::MAX,
44115            i32::MIN, 100, -100, -32,
44116            0, 1, -1, i32::MAX,
44117            i32::MIN, 100, -100, -32,
44118        );
44119        let b = _mm512_set1_epi32(2);
44120        let r = _mm512_mask_mullo_epi32(a, 0, a, b);
44121        assert_eq_m512i(r, a);
44122        let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
44123        #[rustfmt::skip]
44124        let e = _mm512_setr_epi32(
44125            0, 2, -2, -2,
44126            0, 200, -200, -64,
44127            0, 1, -1, i32::MAX,
44128            i32::MIN, 100, -100, -32,
44129        );
44130        assert_eq_m512i(r, e);
44131    }
44132
44133    #[simd_test(enable = "avx512f")]
44134    unsafe fn test_mm512_maskz_mullo_epi32() {
44135        #[rustfmt::skip]
44136        let a = _mm512_setr_epi32(
44137            0, 1, -1, i32::MAX,
44138            i32::MIN, 100, -100, -32,
44139            0, 1, -1, i32::MAX,
44140            i32::MIN, 100, -100, -32,
44141        );
44142        let b = _mm512_set1_epi32(2);
44143        let r = _mm512_maskz_mullo_epi32(0, a, b);
44144        assert_eq_m512i(r, _mm512_setzero_si512());
44145        let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b);
44146        let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
44147        assert_eq_m512i(r, e);
44148    }
44149
44150    #[simd_test(enable = "avx512f,avx512vl")]
44151    unsafe fn test_mm256_mask_mullo_epi32() {
44152        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
44153        let b = _mm256_set1_epi32(2);
44154        let r = _mm256_mask_mullo_epi32(a, 0, a, b);
44155        assert_eq_m256i(r, a);
44156        let r = _mm256_mask_mullo_epi32(a, 0b11111111, a, b);
44157        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
44158        assert_eq_m256i(r, e);
44159    }
44160
44161    #[simd_test(enable = "avx512f,avx512vl")]
44162    unsafe fn test_mm256_maskz_mullo_epi32() {
44163        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
44164        let b = _mm256_set1_epi32(2);
44165        let r = _mm256_maskz_mullo_epi32(0, a, b);
44166        assert_eq_m256i(r, _mm256_setzero_si256());
44167        let r = _mm256_maskz_mullo_epi32(0b11111111, a, b);
44168        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
44169        assert_eq_m256i(r, e);
44170    }
44171
44172    #[simd_test(enable = "avx512f,avx512vl")]
44173    unsafe fn test_mm_mask_mullo_epi32() {
44174        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
44175        let b = _mm_set1_epi32(2);
44176        let r = _mm_mask_mullo_epi32(a, 0, a, b);
44177        assert_eq_m128i(r, a);
44178        let r = _mm_mask_mullo_epi32(a, 0b00001111, a, b);
44179        let e = _mm_set_epi32(2, -2, -2, 0);
44180        assert_eq_m128i(r, e);
44181    }
44182
44183    #[simd_test(enable = "avx512f,avx512vl")]
44184    unsafe fn test_mm_maskz_mullo_epi32() {
44185        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
44186        let b = _mm_set1_epi32(2);
44187        let r = _mm_maskz_mullo_epi32(0, a, b);
44188        assert_eq_m128i(r, _mm_setzero_si128());
44189        let r = _mm_maskz_mullo_epi32(0b00001111, a, b);
44190        let e = _mm_set_epi32(2, -2, -2, 0);
44191        assert_eq_m128i(r, e);
44192    }
44193
44194    #[simd_test(enable = "avx512f")]
44195    unsafe fn test_mm512_mul_ps() {
44196        #[rustfmt::skip]
44197        let a = _mm512_setr_ps(
44198            0., 1., -1., f32::MAX,
44199            f32::MIN, 100., -100., -32.,
44200            0., 1., -1., f32::MAX,
44201            f32::MIN, 100., -100., -32.,
44202        );
44203        let b = _mm512_set1_ps(2.);
44204        let r = _mm512_mul_ps(a, b);
44205        #[rustfmt::skip]
44206        let e = _mm512_setr_ps(
44207            0., 2., -2., f32::INFINITY,
44208            f32::NEG_INFINITY, 200., -200., -64.,
44209            0., 2., -2., f32::INFINITY,
44210            f32::NEG_INFINITY, 200., -200.,
44211            -64.,
44212        );
44213        assert_eq_m512(r, e);
44214    }
44215
44216    #[simd_test(enable = "avx512f")]
44217    unsafe fn test_mm512_mask_mul_ps() {
44218        #[rustfmt::skip]
44219        let a = _mm512_setr_ps(
44220            0., 1., -1., f32::MAX,
44221            f32::MIN, 100., -100., -32.,
44222            0., 1., -1., f32::MAX,
44223            f32::MIN, 100., -100., -32.,
44224        );
44225        let b = _mm512_set1_ps(2.);
44226        let r = _mm512_mask_mul_ps(a, 0, a, b);
44227        assert_eq_m512(r, a);
44228        let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
44229        #[rustfmt::skip]
44230        let e = _mm512_setr_ps(
44231            0., 2., -2., f32::INFINITY,
44232            f32::NEG_INFINITY, 200., -200., -64.,
44233            0., 1., -1., f32::MAX,
44234            f32::MIN, 100., -100., -32.,
44235        );
44236        assert_eq_m512(r, e);
44237    }
44238
44239    #[simd_test(enable = "avx512f")]
44240    unsafe fn test_mm512_maskz_mul_ps() {
44241        #[rustfmt::skip]
44242        let a = _mm512_setr_ps(
44243            0., 1., -1., f32::MAX,
44244            f32::MIN, 100., -100., -32.,
44245            0., 1., -1., f32::MAX,
44246            f32::MIN, 100., -100., -32.,
44247        );
44248        let b = _mm512_set1_ps(2.);
44249        let r = _mm512_maskz_mul_ps(0, a, b);
44250        assert_eq_m512(r, _mm512_setzero_ps());
44251        let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
44252        #[rustfmt::skip]
44253        let e = _mm512_setr_ps(
44254            0., 2., -2., f32::INFINITY,
44255            f32::NEG_INFINITY, 200., -200., -64.,
44256            0., 0., 0., 0.,
44257            0., 0., 0., 0.,
44258        );
44259        assert_eq_m512(r, e);
44260    }
44261
44262    #[simd_test(enable = "avx512f,avx512vl")]
44263    unsafe fn test_mm256_mask_mul_ps() {
44264        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44265        let b = _mm256_set1_ps(2.);
44266        let r = _mm256_mask_mul_ps(a, 0, a, b);
44267        assert_eq_m256(r, a);
44268        let r = _mm256_mask_mul_ps(a, 0b11111111, a, b);
44269        #[rustfmt::skip]
44270        let e = _mm256_set_ps(
44271            0., 2., -2., f32::INFINITY,
44272            f32::NEG_INFINITY, 200., -200., -64.,
44273        );
44274        assert_eq_m256(r, e);
44275    }
44276
44277    #[simd_test(enable = "avx512f,avx512vl")]
44278    unsafe fn test_mm256_maskz_mul_ps() {
44279        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44280        let b = _mm256_set1_ps(2.);
44281        let r = _mm256_maskz_mul_ps(0, a, b);
44282        assert_eq_m256(r, _mm256_setzero_ps());
44283        let r = _mm256_maskz_mul_ps(0b11111111, a, b);
44284        #[rustfmt::skip]
44285        let e = _mm256_set_ps(
44286            0., 2., -2., f32::INFINITY,
44287            f32::NEG_INFINITY, 200., -200., -64.,
44288        );
44289        assert_eq_m256(r, e);
44290    }
44291
44292    #[simd_test(enable = "avx512f,avx512vl")]
44293    unsafe fn test_mm_mask_mul_ps() {
44294        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44295        let b = _mm_set1_ps(2.);
44296        let r = _mm_mask_mul_ps(a, 0, a, b);
44297        assert_eq_m128(r, a);
44298        let r = _mm_mask_mul_ps(a, 0b00001111, a, b);
44299        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
44300        assert_eq_m128(r, e);
44301    }
44302
44303    #[simd_test(enable = "avx512f,avx512vl")]
44304    unsafe fn test_mm_maskz_mul_ps() {
44305        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44306        let b = _mm_set1_ps(2.);
44307        let r = _mm_maskz_mul_ps(0, a, b);
44308        assert_eq_m128(r, _mm_setzero_ps());
44309        let r = _mm_maskz_mul_ps(0b00001111, a, b);
44310        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
44311        assert_eq_m128(r, e);
44312    }
44313
44314    #[simd_test(enable = "avx512f")]
44315    unsafe fn test_mm512_div_ps() {
44316        let a = _mm512_setr_ps(
44317            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44318        );
44319        let b = _mm512_setr_ps(
44320            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44321        );
44322        let r = _mm512_div_ps(a, b);
44323        #[rustfmt::skip]
44324        let e = _mm512_setr_ps(
44325            0., 0.5, -0.5, -1.,
44326            50., f32::INFINITY, -50., -16.,
44327            0., 0.5, -0.5, 500.,
44328            f32::NEG_INFINITY, 50., -50., -16.,
44329        );
44330        assert_eq_m512(r, e); // 0/0 = NAN
44331    }
44332
44333    #[simd_test(enable = "avx512f")]
44334    unsafe fn test_mm512_mask_div_ps() {
44335        let a = _mm512_setr_ps(
44336            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44337        );
44338        let b = _mm512_setr_ps(
44339            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44340        );
44341        let r = _mm512_mask_div_ps(a, 0, a, b);
44342        assert_eq_m512(r, a);
44343        let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
44344        #[rustfmt::skip]
44345        let e = _mm512_setr_ps(
44346            0., 0.5, -0.5, -1.,
44347            50., f32::INFINITY, -50., -16.,
44348            0., 1., -1., 1000.,
44349            -131., 100., -100., -32.,
44350        );
44351        assert_eq_m512(r, e);
44352    }
44353
44354    #[simd_test(enable = "avx512f")]
44355    unsafe fn test_mm512_maskz_div_ps() {
44356        let a = _mm512_setr_ps(
44357            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44358        );
44359        let b = _mm512_setr_ps(
44360            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44361        );
44362        let r = _mm512_maskz_div_ps(0, a, b);
44363        assert_eq_m512(r, _mm512_setzero_ps());
44364        let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
44365        #[rustfmt::skip]
44366        let e = _mm512_setr_ps(
44367            0., 0.5, -0.5, -1.,
44368            50., f32::INFINITY, -50., -16.,
44369            0., 0., 0., 0.,
44370            0., 0., 0., 0.,
44371        );
44372        assert_eq_m512(r, e);
44373    }
44374
44375    #[simd_test(enable = "avx512f,avx512vl")]
44376    unsafe fn test_mm256_mask_div_ps() {
44377        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
44378        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
44379        let r = _mm256_mask_div_ps(a, 0, a, b);
44380        assert_eq_m256(r, a);
44381        let r = _mm256_mask_div_ps(a, 0b11111111, a, b);
44382        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
44383        assert_eq_m256(r, e);
44384    }
44385
44386    #[simd_test(enable = "avx512f,avx512vl")]
44387    unsafe fn test_mm256_maskz_div_ps() {
44388        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
44389        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
44390        let r = _mm256_maskz_div_ps(0, a, b);
44391        assert_eq_m256(r, _mm256_setzero_ps());
44392        let r = _mm256_maskz_div_ps(0b11111111, a, b);
44393        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
44394        assert_eq_m256(r, e);
44395    }
44396
44397    #[simd_test(enable = "avx512f,avx512vl")]
44398    unsafe fn test_mm_mask_div_ps() {
44399        let a = _mm_set_ps(100., 100., -100., -32.);
44400        let b = _mm_set_ps(2., 0., 2., 2.);
44401        let r = _mm_mask_div_ps(a, 0, a, b);
44402        assert_eq_m128(r, a);
44403        let r = _mm_mask_div_ps(a, 0b00001111, a, b);
44404        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
44405        assert_eq_m128(r, e);
44406    }
44407
44408    #[simd_test(enable = "avx512f,avx512vl")]
44409    unsafe fn test_mm_maskz_div_ps() {
44410        let a = _mm_set_ps(100., 100., -100., -32.);
44411        let b = _mm_set_ps(2., 0., 2., 2.);
44412        let r = _mm_maskz_div_ps(0, a, b);
44413        assert_eq_m128(r, _mm_setzero_ps());
44414        let r = _mm_maskz_div_ps(0b00001111, a, b);
44415        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
44416        assert_eq_m128(r, e);
44417    }
44418
44419    #[simd_test(enable = "avx512f")]
44420    unsafe fn test_mm512_max_epi32() {
44421        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44422        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44423        let r = _mm512_max_epi32(a, b);
44424        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44425        assert_eq_m512i(r, e);
44426    }
44427
44428    #[simd_test(enable = "avx512f")]
44429    unsafe fn test_mm512_mask_max_epi32() {
44430        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44431        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44432        let r = _mm512_mask_max_epi32(a, 0, a, b);
44433        assert_eq_m512i(r, a);
44434        let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b);
44435        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44436        assert_eq_m512i(r, e);
44437    }
44438
44439    #[simd_test(enable = "avx512f")]
44440    unsafe fn test_mm512_maskz_max_epi32() {
44441        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44442        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44443        let r = _mm512_maskz_max_epi32(0, a, b);
44444        assert_eq_m512i(r, _mm512_setzero_si512());
44445        let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b);
44446        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
44447        assert_eq_m512i(r, e);
44448    }
44449
44450    #[simd_test(enable = "avx512f,avx512vl")]
44451    unsafe fn test_mm256_mask_max_epi32() {
44452        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44453        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44454        let r = _mm256_mask_max_epi32(a, 0, a, b);
44455        assert_eq_m256i(r, a);
44456        let r = _mm256_mask_max_epi32(a, 0b11111111, a, b);
44457        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44458        assert_eq_m256i(r, e);
44459    }
44460
44461    #[simd_test(enable = "avx512f,avx512vl")]
44462    unsafe fn test_mm256_maskz_max_epi32() {
44463        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44464        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44465        let r = _mm256_maskz_max_epi32(0, a, b);
44466        assert_eq_m256i(r, _mm256_setzero_si256());
44467        let r = _mm256_maskz_max_epi32(0b11111111, a, b);
44468        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44469        assert_eq_m256i(r, e);
44470    }
44471
44472    #[simd_test(enable = "avx512f,avx512vl")]
44473    unsafe fn test_mm_mask_max_epi32() {
44474        let a = _mm_set_epi32(0, 1, 2, 3);
44475        let b = _mm_set_epi32(3, 2, 1, 0);
44476        let r = _mm_mask_max_epi32(a, 0, a, b);
44477        assert_eq_m128i(r, a);
44478        let r = _mm_mask_max_epi32(a, 0b00001111, a, b);
44479        let e = _mm_set_epi32(3, 2, 2, 3);
44480        assert_eq_m128i(r, e);
44481    }
44482
44483    #[simd_test(enable = "avx512f,avx512vl")]
44484    unsafe fn test_mm_maskz_max_epi32() {
44485        let a = _mm_set_epi32(0, 1, 2, 3);
44486        let b = _mm_set_epi32(3, 2, 1, 0);
44487        let r = _mm_maskz_max_epi32(0, a, b);
44488        assert_eq_m128i(r, _mm_setzero_si128());
44489        let r = _mm_maskz_max_epi32(0b00001111, a, b);
44490        let e = _mm_set_epi32(3, 2, 2, 3);
44491        assert_eq_m128i(r, e);
44492    }
44493
44494    #[simd_test(enable = "avx512f")]
44495    unsafe fn test_mm512_max_ps() {
44496        let a = _mm512_setr_ps(
44497            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44498        );
44499        let b = _mm512_setr_ps(
44500            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44501        );
44502        let r = _mm512_max_ps(a, b);
44503        let e = _mm512_setr_ps(
44504            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
44505        );
44506        assert_eq_m512(r, e);
44507    }
44508
44509    #[simd_test(enable = "avx512f")]
44510    unsafe fn test_mm512_mask_max_ps() {
44511        let a = _mm512_setr_ps(
44512            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44513        );
44514        let b = _mm512_setr_ps(
44515            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44516        );
44517        let r = _mm512_mask_max_ps(a, 0, a, b);
44518        assert_eq_m512(r, a);
44519        let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b);
44520        let e = _mm512_setr_ps(
44521            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
44522        );
44523        assert_eq_m512(r, e);
44524    }
44525
44526    #[simd_test(enable = "avx512f")]
44527    unsafe fn test_mm512_maskz_max_ps() {
44528        let a = _mm512_setr_ps(
44529            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44530        );
44531        let b = _mm512_setr_ps(
44532            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44533        );
44534        let r = _mm512_maskz_max_ps(0, a, b);
44535        assert_eq_m512(r, _mm512_setzero_ps());
44536        let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b);
44537        let e = _mm512_setr_ps(
44538            15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
44539        );
44540        assert_eq_m512(r, e);
44541    }
44542
44543    #[simd_test(enable = "avx512f,avx512vl")]
44544    unsafe fn test_mm256_mask_max_ps() {
44545        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44546        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44547        let r = _mm256_mask_max_ps(a, 0, a, b);
44548        assert_eq_m256(r, a);
44549        let r = _mm256_mask_max_ps(a, 0b11111111, a, b);
44550        let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
44551        assert_eq_m256(r, e);
44552    }
44553
44554    #[simd_test(enable = "avx512f,avx512vl")]
44555    unsafe fn test_mm256_maskz_max_ps() {
44556        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44557        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44558        let r = _mm256_maskz_max_ps(0, a, b);
44559        assert_eq_m256(r, _mm256_setzero_ps());
44560        let r = _mm256_maskz_max_ps(0b11111111, a, b);
44561        let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
44562        assert_eq_m256(r, e);
44563    }
44564
44565    #[simd_test(enable = "avx512f,avx512vl")]
44566    unsafe fn test_mm_mask_max_ps() {
44567        let a = _mm_set_ps(0., 1., 2., 3.);
44568        let b = _mm_set_ps(3., 2., 1., 0.);
44569        let r = _mm_mask_max_ps(a, 0, a, b);
44570        assert_eq_m128(r, a);
44571        let r = _mm_mask_max_ps(a, 0b00001111, a, b);
44572        let e = _mm_set_ps(3., 2., 2., 3.);
44573        assert_eq_m128(r, e);
44574    }
44575
44576    #[simd_test(enable = "avx512f,avx512vl")]
44577    unsafe fn test_mm_maskz_max_ps() {
44578        let a = _mm_set_ps(0., 1., 2., 3.);
44579        let b = _mm_set_ps(3., 2., 1., 0.);
44580        let r = _mm_maskz_max_ps(0, a, b);
44581        assert_eq_m128(r, _mm_setzero_ps());
44582        let r = _mm_mask_max_ps(a, 0b00001111, a, b);
44583        let e = _mm_set_ps(3., 2., 2., 3.);
44584        assert_eq_m128(r, e);
44585    }
44586
44587    #[simd_test(enable = "avx512f")]
44588    unsafe fn test_mm512_max_epu32() {
44589        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44590        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44591        let r = _mm512_max_epu32(a, b);
44592        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44593        assert_eq_m512i(r, e);
44594    }
44595
44596    #[simd_test(enable = "avx512f")]
44597    unsafe fn test_mm512_mask_max_epu32() {
44598        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44599        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44600        let r = _mm512_mask_max_epu32(a, 0, a, b);
44601        assert_eq_m512i(r, a);
44602        let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b);
44603        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44604        assert_eq_m512i(r, e);
44605    }
44606
44607    #[simd_test(enable = "avx512f")]
44608    unsafe fn test_mm512_maskz_max_epu32() {
44609        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44610        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44611        let r = _mm512_maskz_max_epu32(0, a, b);
44612        assert_eq_m512i(r, _mm512_setzero_si512());
44613        let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b);
44614        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
44615        assert_eq_m512i(r, e);
44616    }
44617
44618    #[simd_test(enable = "avx512f,avx512vl")]
44619    unsafe fn test_mm256_mask_max_epu32() {
44620        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44621        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44622        let r = _mm256_mask_max_epu32(a, 0, a, b);
44623        assert_eq_m256i(r, a);
44624        let r = _mm256_mask_max_epu32(a, 0b11111111, a, b);
44625        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44626        assert_eq_m256i(r, e);
44627    }
44628
44629    #[simd_test(enable = "avx512f,avx512vl")]
44630    unsafe fn test_mm256_maskz_max_epu32() {
44631        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44632        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44633        let r = _mm256_maskz_max_epu32(0, a, b);
44634        assert_eq_m256i(r, _mm256_setzero_si256());
44635        let r = _mm256_maskz_max_epu32(0b11111111, a, b);
44636        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44637        assert_eq_m256i(r, e);
44638    }
44639
44640    #[simd_test(enable = "avx512f,avx512vl")]
44641    unsafe fn test_mm_mask_max_epu32() {
44642        let a = _mm_set_epi32(0, 1, 2, 3);
44643        let b = _mm_set_epi32(3, 2, 1, 0);
44644        let r = _mm_mask_max_epu32(a, 0, a, b);
44645        assert_eq_m128i(r, a);
44646        let r = _mm_mask_max_epu32(a, 0b00001111, a, b);
44647        let e = _mm_set_epi32(3, 2, 2, 3);
44648        assert_eq_m128i(r, e);
44649    }
44650
44651    #[simd_test(enable = "avx512f,avx512vl")]
44652    unsafe fn test_mm_maskz_max_epu32() {
44653        let a = _mm_set_epi32(0, 1, 2, 3);
44654        let b = _mm_set_epi32(3, 2, 1, 0);
44655        let r = _mm_maskz_max_epu32(0, a, b);
44656        assert_eq_m128i(r, _mm_setzero_si128());
44657        let r = _mm_maskz_max_epu32(0b00001111, a, b);
44658        let e = _mm_set_epi32(3, 2, 2, 3);
44659        assert_eq_m128i(r, e);
44660    }
44661
44662    #[simd_test(enable = "avx512f")]
44663    unsafe fn test_mm512_min_epi32() {
44664        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44665        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44666        let r = _mm512_min_epi32(a, b);
44667        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
44668        assert_eq_m512i(r, e);
44669    }
44670
44671    #[simd_test(enable = "avx512f")]
44672    unsafe fn test_mm512_mask_min_epi32() {
44673        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44674        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44675        let r = _mm512_mask_min_epi32(a, 0, a, b);
44676        assert_eq_m512i(r, a);
44677        let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b);
44678        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44679        assert_eq_m512i(r, e);
44680    }
44681
44682    #[simd_test(enable = "avx512f")]
44683    unsafe fn test_mm512_maskz_min_epi32() {
44684        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44685        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44686        let r = _mm512_maskz_min_epi32(0, a, b);
44687        assert_eq_m512i(r, _mm512_setzero_si512());
44688        let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b);
44689        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
44690        assert_eq_m512i(r, e);
44691    }
44692
44693    #[simd_test(enable = "avx512f,avx512vl")]
44694    unsafe fn test_mm256_mask_min_epi32() {
44695        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44696        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44697        let r = _mm256_mask_min_epi32(a, 0, a, b);
44698        assert_eq_m256i(r, a);
44699        let r = _mm256_mask_min_epi32(a, 0b11111111, a, b);
44700        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44701        assert_eq_m256i(r, e);
44702    }
44703
44704    #[simd_test(enable = "avx512f,avx512vl")]
44705    unsafe fn test_mm256_maskz_min_epi32() {
44706        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44707        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44708        let r = _mm256_maskz_min_epi32(0, a, b);
44709        assert_eq_m256i(r, _mm256_setzero_si256());
44710        let r = _mm256_maskz_min_epi32(0b11111111, a, b);
44711        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44712        assert_eq_m256i(r, e);
44713    }
44714
44715    #[simd_test(enable = "avx512f,avx512vl")]
44716    unsafe fn test_mm_mask_min_epi32() {
44717        let a = _mm_set_epi32(0, 1, 2, 3);
44718        let b = _mm_set_epi32(3, 2, 1, 0);
44719        let r = _mm_mask_min_epi32(a, 0, a, b);
44720        assert_eq_m128i(r, a);
44721        let r = _mm_mask_min_epi32(a, 0b00001111, a, b);
44722        let e = _mm_set_epi32(0, 1, 1, 0);
44723        assert_eq_m128i(r, e);
44724    }
44725
44726    #[simd_test(enable = "avx512f,avx512vl")]
44727    unsafe fn test_mm_maskz_min_epi32() {
44728        let a = _mm_set_epi32(0, 1, 2, 3);
44729        let b = _mm_set_epi32(3, 2, 1, 0);
44730        let r = _mm_maskz_min_epi32(0, a, b);
44731        assert_eq_m128i(r, _mm_setzero_si128());
44732        let r = _mm_maskz_min_epi32(0b00001111, a, b);
44733        let e = _mm_set_epi32(0, 1, 1, 0);
44734        assert_eq_m128i(r, e);
44735    }
44736
44737    #[simd_test(enable = "avx512f")]
44738    unsafe fn test_mm512_min_ps() {
44739        let a = _mm512_setr_ps(
44740            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44741        );
44742        let b = _mm512_setr_ps(
44743            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44744        );
44745        let r = _mm512_min_ps(a, b);
44746        let e = _mm512_setr_ps(
44747            0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
44748        );
44749        assert_eq_m512(r, e);
44750    }
44751
44752    #[simd_test(enable = "avx512f")]
44753    unsafe fn test_mm512_mask_min_ps() {
44754        let a = _mm512_setr_ps(
44755            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44756        );
44757        let b = _mm512_setr_ps(
44758            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44759        );
44760        let r = _mm512_mask_min_ps(a, 0, a, b);
44761        assert_eq_m512(r, a);
44762        let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b);
44763        let e = _mm512_setr_ps(
44764            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44765        );
44766        assert_eq_m512(r, e);
44767    }
44768
44769    #[simd_test(enable = "avx512f")]
44770    unsafe fn test_mm512_maskz_min_ps() {
44771        let a = _mm512_setr_ps(
44772            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44773        );
44774        let b = _mm512_setr_ps(
44775            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44776        );
44777        let r = _mm512_maskz_min_ps(0, a, b);
44778        assert_eq_m512(r, _mm512_setzero_ps());
44779        let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b);
44780        let e = _mm512_setr_ps(
44781            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
44782        );
44783        assert_eq_m512(r, e);
44784    }
44785
44786    #[simd_test(enable = "avx512f,avx512vl")]
44787    unsafe fn test_mm256_mask_min_ps() {
44788        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44789        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44790        let r = _mm256_mask_min_ps(a, 0, a, b);
44791        assert_eq_m256(r, a);
44792        let r = _mm256_mask_min_ps(a, 0b11111111, a, b);
44793        let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
44794        assert_eq_m256(r, e);
44795    }
44796
44797    #[simd_test(enable = "avx512f,avx512vl")]
44798    unsafe fn test_mm256_maskz_min_ps() {
44799        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44800        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44801        let r = _mm256_maskz_min_ps(0, a, b);
44802        assert_eq_m256(r, _mm256_setzero_ps());
44803        let r = _mm256_maskz_min_ps(0b11111111, a, b);
44804        let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
44805        assert_eq_m256(r, e);
44806    }
44807
44808    #[simd_test(enable = "avx512f,avx512vl")]
44809    unsafe fn test_mm_mask_min_ps() {
44810        let a = _mm_set_ps(0., 1., 2., 3.);
44811        let b = _mm_set_ps(3., 2., 1., 0.);
44812        let r = _mm_mask_min_ps(a, 0, a, b);
44813        assert_eq_m128(r, a);
44814        let r = _mm_mask_min_ps(a, 0b00001111, a, b);
44815        let e = _mm_set_ps(0., 1., 1., 0.);
44816        assert_eq_m128(r, e);
44817    }
44818
44819    #[simd_test(enable = "avx512f,avx512vl")]
44820    unsafe fn test_mm_maskz_min_ps() {
44821        let a = _mm_set_ps(0., 1., 2., 3.);
44822        let b = _mm_set_ps(3., 2., 1., 0.);
44823        let r = _mm_maskz_min_ps(0, a, b);
44824        assert_eq_m128(r, _mm_setzero_ps());
44825        let r = _mm_maskz_min_ps(0b00001111, a, b);
44826        let e = _mm_set_ps(0., 1., 1., 0.);
44827        assert_eq_m128(r, e);
44828    }
44829
44830    #[simd_test(enable = "avx512f")]
44831    unsafe fn test_mm512_min_epu32() {
44832        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44833        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44834        let r = _mm512_min_epu32(a, b);
44835        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
44836        assert_eq_m512i(r, e);
44837    }
44838
44839    #[simd_test(enable = "avx512f")]
44840    unsafe fn test_mm512_mask_min_epu32() {
44841        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44842        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44843        let r = _mm512_mask_min_epu32(a, 0, a, b);
44844        assert_eq_m512i(r, a);
44845        let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b);
44846        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44847        assert_eq_m512i(r, e);
44848    }
44849
44850    #[simd_test(enable = "avx512f")]
44851    unsafe fn test_mm512_maskz_min_epu32() {
44852        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44853        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44854        let r = _mm512_maskz_min_epu32(0, a, b);
44855        assert_eq_m512i(r, _mm512_setzero_si512());
44856        let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b);
44857        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
44858        assert_eq_m512i(r, e);
44859    }
44860
44861    #[simd_test(enable = "avx512f,avx512vl")]
44862    unsafe fn test_mm256_mask_min_epu32() {
44863        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44864        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44865        let r = _mm256_mask_min_epu32(a, 0, a, b);
44866        assert_eq_m256i(r, a);
44867        let r = _mm256_mask_min_epu32(a, 0b11111111, a, b);
44868        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44869        assert_eq_m256i(r, e);
44870    }
44871
44872    #[simd_test(enable = "avx512f,avx512vl")]
44873    unsafe fn test_mm256_maskz_min_epu32() {
44874        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44875        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44876        let r = _mm256_maskz_min_epu32(0, a, b);
44877        assert_eq_m256i(r, _mm256_setzero_si256());
44878        let r = _mm256_maskz_min_epu32(0b11111111, a, b);
44879        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44880        assert_eq_m256i(r, e);
44881    }
44882
44883    #[simd_test(enable = "avx512f,avx512vl")]
44884    unsafe fn test_mm_mask_min_epu32() {
44885        let a = _mm_set_epi32(0, 1, 2, 3);
44886        let b = _mm_set_epi32(3, 2, 1, 0);
44887        let r = _mm_mask_min_epu32(a, 0, a, b);
44888        assert_eq_m128i(r, a);
44889        let r = _mm_mask_min_epu32(a, 0b00001111, a, b);
44890        let e = _mm_set_epi32(0, 1, 1, 0);
44891        assert_eq_m128i(r, e);
44892    }
44893
44894    #[simd_test(enable = "avx512f,avx512vl")]
44895    unsafe fn test_mm_maskz_min_epu32() {
44896        let a = _mm_set_epi32(0, 1, 2, 3);
44897        let b = _mm_set_epi32(3, 2, 1, 0);
44898        let r = _mm_maskz_min_epu32(0, a, b);
44899        assert_eq_m128i(r, _mm_setzero_si128());
44900        let r = _mm_maskz_min_epu32(0b00001111, a, b);
44901        let e = _mm_set_epi32(0, 1, 1, 0);
44902        assert_eq_m128i(r, e);
44903    }
44904
44905    #[simd_test(enable = "avx512f")]
44906    unsafe fn test_mm512_sqrt_ps() {
44907        let a = _mm512_setr_ps(
44908            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44909        );
44910        let r = _mm512_sqrt_ps(a);
44911        let e = _mm512_setr_ps(
44912            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44913        );
44914        assert_eq_m512(r, e);
44915    }
44916
44917    #[simd_test(enable = "avx512f")]
44918    unsafe fn test_mm512_mask_sqrt_ps() {
44919        let a = _mm512_setr_ps(
44920            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44921        );
44922        let r = _mm512_mask_sqrt_ps(a, 0, a);
44923        assert_eq_m512(r, a);
44924        let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a);
44925        let e = _mm512_setr_ps(
44926            0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
44927        );
44928        assert_eq_m512(r, e);
44929    }
44930
44931    #[simd_test(enable = "avx512f")]
44932    unsafe fn test_mm512_maskz_sqrt_ps() {
44933        let a = _mm512_setr_ps(
44934            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44935        );
44936        let r = _mm512_maskz_sqrt_ps(0, a);
44937        assert_eq_m512(r, _mm512_setzero_ps());
44938        let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a);
44939        let e = _mm512_setr_ps(
44940            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
44941        );
44942        assert_eq_m512(r, e);
44943    }
44944
44945    #[simd_test(enable = "avx512f,avx512vl")]
44946    unsafe fn test_mm256_mask_sqrt_ps() {
44947        let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
44948        let r = _mm256_mask_sqrt_ps(a, 0, a);
44949        assert_eq_m256(r, a);
44950        let r = _mm256_mask_sqrt_ps(a, 0b11111111, a);
44951        let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44952        assert_eq_m256(r, e);
44953    }
44954
44955    #[simd_test(enable = "avx512f,avx512vl")]
44956    unsafe fn test_mm256_maskz_sqrt_ps() {
44957        let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
44958        let r = _mm256_maskz_sqrt_ps(0, a);
44959        assert_eq_m256(r, _mm256_setzero_ps());
44960        let r = _mm256_maskz_sqrt_ps(0b11111111, a);
44961        let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44962        assert_eq_m256(r, e);
44963    }
44964
44965    #[simd_test(enable = "avx512f,avx512vl")]
44966    unsafe fn test_mm_mask_sqrt_ps() {
44967        let a = _mm_set_ps(0., 1., 4., 9.);
44968        let r = _mm_mask_sqrt_ps(a, 0, a);
44969        assert_eq_m128(r, a);
44970        let r = _mm_mask_sqrt_ps(a, 0b00001111, a);
44971        let e = _mm_set_ps(0., 1., 2., 3.);
44972        assert_eq_m128(r, e);
44973    }
44974
44975    #[simd_test(enable = "avx512f,avx512vl")]
44976    unsafe fn test_mm_maskz_sqrt_ps() {
44977        let a = _mm_set_ps(0., 1., 4., 9.);
44978        let r = _mm_maskz_sqrt_ps(0, a);
44979        assert_eq_m128(r, _mm_setzero_ps());
44980        let r = _mm_maskz_sqrt_ps(0b00001111, a);
44981        let e = _mm_set_ps(0., 1., 2., 3.);
44982        assert_eq_m128(r, e);
44983    }
44984
44985    #[simd_test(enable = "avx512f")]
44986    unsafe fn test_mm512_fmadd_ps() {
44987        let a = _mm512_set1_ps(1.);
44988        let b = _mm512_setr_ps(
44989            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44990        );
44991        let c = _mm512_set1_ps(1.);
44992        let r = _mm512_fmadd_ps(a, b, c);
44993        let e = _mm512_setr_ps(
44994            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
44995        );
44996        assert_eq_m512(r, e);
44997    }
44998
44999    #[simd_test(enable = "avx512f")]
45000    unsafe fn test_mm512_mask_fmadd_ps() {
45001        let a = _mm512_set1_ps(1.);
45002        let b = _mm512_setr_ps(
45003            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45004        );
45005        let c = _mm512_set1_ps(1.);
45006        let r = _mm512_mask_fmadd_ps(a, 0, b, c);
45007        assert_eq_m512(r, a);
45008        let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c);
45009        let e = _mm512_setr_ps(
45010            1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
45011        );
45012        assert_eq_m512(r, e);
45013    }
45014
45015    #[simd_test(enable = "avx512f")]
45016    unsafe fn test_mm512_maskz_fmadd_ps() {
45017        let a = _mm512_set1_ps(1.);
45018        let b = _mm512_setr_ps(
45019            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45020        );
45021        let c = _mm512_set1_ps(1.);
45022        let r = _mm512_maskz_fmadd_ps(0, a, b, c);
45023        assert_eq_m512(r, _mm512_setzero_ps());
45024        let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c);
45025        let e = _mm512_setr_ps(
45026            1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
45027        );
45028        assert_eq_m512(r, e);
45029    }
45030
45031    #[simd_test(enable = "avx512f")]
45032    unsafe fn test_mm512_mask3_fmadd_ps() {
45033        let a = _mm512_set1_ps(1.);
45034        let b = _mm512_setr_ps(
45035            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45036        );
45037        let c = _mm512_set1_ps(2.);
45038        let r = _mm512_mask3_fmadd_ps(a, b, c, 0);
45039        assert_eq_m512(r, c);
45040        let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111);
45041        let e = _mm512_setr_ps(
45042            2., 3., 4., 5., 6., 7., 8., 9., 2., 2., 2., 2., 2., 2., 2., 2.,
45043        );
45044        assert_eq_m512(r, e);
45045    }
45046
45047    #[simd_test(enable = "avx512f,avx512vl")]
45048    unsafe fn test_mm256_mask_fmadd_ps() {
45049        let a = _mm256_set1_ps(1.);
45050        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45051        let c = _mm256_set1_ps(1.);
45052        let r = _mm256_mask_fmadd_ps(a, 0, b, c);
45053        assert_eq_m256(r, a);
45054        let r = _mm256_mask_fmadd_ps(a, 0b11111111, b, c);
45055        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
45056        assert_eq_m256(r, e);
45057    }
45058
45059    #[simd_test(enable = "avx512f,avx512vl")]
45060    unsafe fn test_mm256_maskz_fmadd_ps() {
45061        let a = _mm256_set1_ps(1.);
45062        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45063        let c = _mm256_set1_ps(1.);
45064        let r = _mm256_maskz_fmadd_ps(0, a, b, c);
45065        assert_eq_m256(r, _mm256_setzero_ps());
45066        let r = _mm256_maskz_fmadd_ps(0b11111111, a, b, c);
45067        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
45068        assert_eq_m256(r, e);
45069    }
45070
45071    #[simd_test(enable = "avx512f,avx512vl")]
45072    unsafe fn test_mm256_mask3_fmadd_ps() {
45073        let a = _mm256_set1_ps(1.);
45074        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45075        let c = _mm256_set1_ps(1.);
45076        let r = _mm256_mask3_fmadd_ps(a, b, c, 0);
45077        assert_eq_m256(r, c);
45078        let r = _mm256_mask3_fmadd_ps(a, b, c, 0b11111111);
45079        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
45080        assert_eq_m256(r, e);
45081    }
45082
45083    #[simd_test(enable = "avx512f,avx512vl")]
45084    unsafe fn test_mm_mask_fmadd_ps() {
45085        let a = _mm_set1_ps(1.);
45086        let b = _mm_set_ps(0., 1., 2., 3.);
45087        let c = _mm_set1_ps(1.);
45088        let r = _mm_mask_fmadd_ps(a, 0, b, c);
45089        assert_eq_m128(r, a);
45090        let r = _mm_mask_fmadd_ps(a, 0b00001111, b, c);
45091        let e = _mm_set_ps(1., 2., 3., 4.);
45092        assert_eq_m128(r, e);
45093    }
45094
45095    #[simd_test(enable = "avx512f,avx512vl")]
45096    unsafe fn test_mm_maskz_fmadd_ps() {
45097        let a = _mm_set1_ps(1.);
45098        let b = _mm_set_ps(0., 1., 2., 3.);
45099        let c = _mm_set1_ps(1.);
45100        let r = _mm_maskz_fmadd_ps(0, a, b, c);
45101        assert_eq_m128(r, _mm_setzero_ps());
45102        let r = _mm_maskz_fmadd_ps(0b00001111, a, b, c);
45103        let e = _mm_set_ps(1., 2., 3., 4.);
45104        assert_eq_m128(r, e);
45105    }
45106
45107    #[simd_test(enable = "avx512f,avx512vl")]
45108    unsafe fn test_mm_mask3_fmadd_ps() {
45109        let a = _mm_set1_ps(1.);
45110        let b = _mm_set_ps(0., 1., 2., 3.);
45111        let c = _mm_set1_ps(1.);
45112        let r = _mm_mask3_fmadd_ps(a, b, c, 0);
45113        assert_eq_m128(r, c);
45114        let r = _mm_mask3_fmadd_ps(a, b, c, 0b00001111);
45115        let e = _mm_set_ps(1., 2., 3., 4.);
45116        assert_eq_m128(r, e);
45117    }
45118
45119    #[simd_test(enable = "avx512f")]
45120    unsafe fn test_mm512_fmsub_ps() {
45121        let a = _mm512_setr_ps(
45122            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45123        );
45124        let b = _mm512_setr_ps(
45125            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45126        );
45127        let c = _mm512_setr_ps(
45128            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45129        );
45130        let r = _mm512_fmsub_ps(a, b, c);
45131        let e = _mm512_setr_ps(
45132            -1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
45133        );
45134        assert_eq_m512(r, e);
45135    }
45136
45137    #[simd_test(enable = "avx512f")]
45138    unsafe fn test_mm512_mask_fmsub_ps() {
45139        let a = _mm512_set1_ps(1.);
45140        let b = _mm512_setr_ps(
45141            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45142        );
45143        let c = _mm512_set1_ps(1.);
45144        let r = _mm512_mask_fmsub_ps(a, 0, b, c);
45145        assert_eq_m512(r, a);
45146        let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c);
45147        let e = _mm512_setr_ps(
45148            -1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
45149        );
45150        assert_eq_m512(r, e);
45151    }
45152
45153    #[simd_test(enable = "avx512f")]
45154    unsafe fn test_mm512_maskz_fmsub_ps() {
45155        let a = _mm512_set1_ps(1.);
45156        let b = _mm512_setr_ps(
45157            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45158        );
45159        let c = _mm512_set1_ps(1.);
45160        let r = _mm512_maskz_fmsub_ps(0, a, b, c);
45161        assert_eq_m512(r, _mm512_setzero_ps());
45162        let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c);
45163        let e = _mm512_setr_ps(
45164            -1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
45165        );
45166        assert_eq_m512(r, e);
45167    }
45168
45169    #[simd_test(enable = "avx512f")]
45170    unsafe fn test_mm512_mask3_fmsub_ps() {
45171        let a = _mm512_set1_ps(1.);
45172        let b = _mm512_setr_ps(
45173            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45174        );
45175        let c = _mm512_setr_ps(
45176            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45177        );
45178        let r = _mm512_mask3_fmsub_ps(a, b, c, 0);
45179        assert_eq_m512(r, c);
45180        let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111);
45181        let e = _mm512_setr_ps(
45182            -1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
45183        );
45184        assert_eq_m512(r, e);
45185    }
45186
45187    #[simd_test(enable = "avx512f,avx512vl")]
45188    unsafe fn test_mm256_mask_fmsub_ps() {
45189        let a = _mm256_set1_ps(1.);
45190        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45191        let c = _mm256_set1_ps(1.);
45192        let r = _mm256_mask_fmsub_ps(a, 0, b, c);
45193        assert_eq_m256(r, a);
45194        let r = _mm256_mask_fmsub_ps(a, 0b11111111, b, c);
45195        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45196        assert_eq_m256(r, e);
45197    }
45198
45199    #[simd_test(enable = "avx512f,avx512vl")]
45200    unsafe fn test_mm256_maskz_fmsub_ps() {
45201        let a = _mm256_set1_ps(1.);
45202        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45203        let c = _mm256_set1_ps(1.);
45204        let r = _mm256_maskz_fmsub_ps(0, a, b, c);
45205        assert_eq_m256(r, _mm256_setzero_ps());
45206        let r = _mm256_maskz_fmsub_ps(0b11111111, a, b, c);
45207        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45208        assert_eq_m256(r, e);
45209    }
45210
45211    #[simd_test(enable = "avx512f,avx512vl")]
45212    unsafe fn test_mm256_mask3_fmsub_ps() {
45213        let a = _mm256_set1_ps(1.);
45214        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45215        let c = _mm256_set1_ps(1.);
45216        let r = _mm256_mask3_fmsub_ps(a, b, c, 0);
45217        assert_eq_m256(r, c);
45218        let r = _mm256_mask3_fmsub_ps(a, b, c, 0b11111111);
45219        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45220        assert_eq_m256(r, e);
45221    }
45222
45223    #[simd_test(enable = "avx512f,avx512vl")]
45224    unsafe fn test_mm_mask_fmsub_ps() {
45225        let a = _mm_set1_ps(1.);
45226        let b = _mm_set_ps(0., 1., 2., 3.);
45227        let c = _mm_set1_ps(1.);
45228        let r = _mm_mask_fmsub_ps(a, 0, b, c);
45229        assert_eq_m128(r, a);
45230        let r = _mm_mask_fmsub_ps(a, 0b00001111, b, c);
45231        let e = _mm_set_ps(-1., 0., 1., 2.);
45232        assert_eq_m128(r, e);
45233    }
45234
45235    #[simd_test(enable = "avx512f,avx512vl")]
45236    unsafe fn test_mm_maskz_fmsub_ps() {
45237        let a = _mm_set1_ps(1.);
45238        let b = _mm_set_ps(0., 1., 2., 3.);
45239        let c = _mm_set1_ps(1.);
45240        let r = _mm_maskz_fmsub_ps(0, a, b, c);
45241        assert_eq_m128(r, _mm_setzero_ps());
45242        let r = _mm_maskz_fmsub_ps(0b00001111, a, b, c);
45243        let e = _mm_set_ps(-1., 0., 1., 2.);
45244        assert_eq_m128(r, e);
45245    }
45246
45247    #[simd_test(enable = "avx512f,avx512vl")]
45248    unsafe fn test_mm_mask3_fmsub_ps() {
45249        let a = _mm_set1_ps(1.);
45250        let b = _mm_set_ps(0., 1., 2., 3.);
45251        let c = _mm_set1_ps(1.);
45252        let r = _mm_mask3_fmsub_ps(a, b, c, 0);
45253        assert_eq_m128(r, c);
45254        let r = _mm_mask3_fmsub_ps(a, b, c, 0b00001111);
45255        let e = _mm_set_ps(-1., 0., 1., 2.);
45256        assert_eq_m128(r, e);
45257    }
45258
45259    #[simd_test(enable = "avx512f")]
45260    unsafe fn test_mm512_fmaddsub_ps() {
45261        let a = _mm512_set1_ps(1.);
45262        let b = _mm512_setr_ps(
45263            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45264        );
45265        let c = _mm512_set1_ps(1.);
45266        let r = _mm512_fmaddsub_ps(a, b, c);
45267        let e = _mm512_setr_ps(
45268            -1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
45269        );
45270        assert_eq_m512(r, e);
45271    }
45272
45273    #[simd_test(enable = "avx512f")]
45274    unsafe fn test_mm512_mask_fmaddsub_ps() {
45275        let a = _mm512_set1_ps(1.);
45276        let b = _mm512_setr_ps(
45277            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45278        );
45279        let c = _mm512_set1_ps(1.);
45280        let r = _mm512_mask_fmaddsub_ps(a, 0, b, c);
45281        assert_eq_m512(r, a);
45282        let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c);
45283        let e = _mm512_setr_ps(
45284            -1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
45285        );
45286        assert_eq_m512(r, e);
45287    }
45288
45289    #[simd_test(enable = "avx512f")]
45290    unsafe fn test_mm512_maskz_fmaddsub_ps() {
45291        let a = _mm512_set1_ps(1.);
45292        let b = _mm512_setr_ps(
45293            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45294        );
45295        let c = _mm512_set1_ps(1.);
45296        let r = _mm512_maskz_fmaddsub_ps(0, a, b, c);
45297        assert_eq_m512(r, _mm512_setzero_ps());
45298        let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c);
45299        let e = _mm512_setr_ps(
45300            -1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
45301        );
45302        assert_eq_m512(r, e);
45303    }
45304
45305    #[simd_test(enable = "avx512f")]
45306    unsafe fn test_mm512_mask3_fmaddsub_ps() {
45307        let a = _mm512_set1_ps(1.);
45308        let b = _mm512_setr_ps(
45309            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45310        );
45311        let c = _mm512_setr_ps(
45312            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45313        );
45314        let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0);
45315        assert_eq_m512(r, c);
45316        let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111);
45317        let e = _mm512_setr_ps(
45318            -1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
45319        );
45320        assert_eq_m512(r, e);
45321    }
45322
45323    #[simd_test(enable = "avx512f,avx512vl")]
45324    unsafe fn test_mm256_mask_fmaddsub_ps() {
45325        let a = _mm256_set1_ps(1.);
45326        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45327        let c = _mm256_set1_ps(1.);
45328        let r = _mm256_mask_fmaddsub_ps(a, 0, b, c);
45329        assert_eq_m256(r, a);
45330        let r = _mm256_mask_fmaddsub_ps(a, 0b11111111, b, c);
45331        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45332        assert_eq_m256(r, e);
45333    }
45334
45335    #[simd_test(enable = "avx512f,avx512vl")]
45336    unsafe fn test_mm256_maskz_fmaddsub_ps() {
45337        let a = _mm256_set1_ps(1.);
45338        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45339        let c = _mm256_set1_ps(1.);
45340        let r = _mm256_maskz_fmaddsub_ps(0, a, b, c);
45341        assert_eq_m256(r, _mm256_setzero_ps());
45342        let r = _mm256_maskz_fmaddsub_ps(0b11111111, a, b, c);
45343        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45344        assert_eq_m256(r, e);
45345    }
45346
45347    #[simd_test(enable = "avx512f,avx512vl")]
45348    unsafe fn test_mm256_mask3_fmaddsub_ps() {
45349        let a = _mm256_set1_ps(1.);
45350        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45351        let c = _mm256_set1_ps(1.);
45352        let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0);
45353        assert_eq_m256(r, c);
45354        let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0b11111111);
45355        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45356        assert_eq_m256(r, e);
45357    }
45358
45359    #[simd_test(enable = "avx512f,avx512vl")]
45360    unsafe fn test_mm_mask_fmaddsub_ps() {
45361        let a = _mm_set1_ps(1.);
45362        let b = _mm_set_ps(0., 1., 2., 3.);
45363        let c = _mm_set1_ps(1.);
45364        let r = _mm_mask_fmaddsub_ps(a, 0, b, c);
45365        assert_eq_m128(r, a);
45366        let r = _mm_mask_fmaddsub_ps(a, 0b00001111, b, c);
45367        let e = _mm_set_ps(1., 0., 3., 2.);
45368        assert_eq_m128(r, e);
45369    }
45370
45371    #[simd_test(enable = "avx512f,avx512vl")]
45372    unsafe fn test_mm_maskz_fmaddsub_ps() {
45373        let a = _mm_set1_ps(1.);
45374        let b = _mm_set_ps(0., 1., 2., 3.);
45375        let c = _mm_set1_ps(1.);
45376        let r = _mm_maskz_fmaddsub_ps(0, a, b, c);
45377        assert_eq_m128(r, _mm_setzero_ps());
45378        let r = _mm_maskz_fmaddsub_ps(0b00001111, a, b, c);
45379        let e = _mm_set_ps(1., 0., 3., 2.);
45380        assert_eq_m128(r, e);
45381    }
45382
45383    #[simd_test(enable = "avx512f,avx512vl")]
45384    unsafe fn test_mm_mask3_fmaddsub_ps() {
45385        let a = _mm_set1_ps(1.);
45386        let b = _mm_set_ps(0., 1., 2., 3.);
45387        let c = _mm_set1_ps(1.);
45388        let r = _mm_mask3_fmaddsub_ps(a, b, c, 0);
45389        assert_eq_m128(r, c);
45390        let r = _mm_mask3_fmaddsub_ps(a, b, c, 0b00001111);
45391        let e = _mm_set_ps(1., 0., 3., 2.);
45392        assert_eq_m128(r, e);
45393    }
45394
45395    #[simd_test(enable = "avx512f")]
45396    unsafe fn test_mm512_fmsubadd_ps() {
45397        let a = _mm512_setr_ps(
45398            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45399        );
45400        let b = _mm512_setr_ps(
45401            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45402        );
45403        let c = _mm512_setr_ps(
45404            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45405        );
45406        let r = _mm512_fmsubadd_ps(a, b, c);
45407        let e = _mm512_setr_ps(
45408            1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
45409        );
45410        assert_eq_m512(r, e);
45411    }
45412
45413    #[simd_test(enable = "avx512f")]
45414    unsafe fn test_mm512_mask_fmsubadd_ps() {
45415        let a = _mm512_set1_ps(1.);
45416        let b = _mm512_setr_ps(
45417            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45418        );
45419        let c = _mm512_set1_ps(1.);
45420        let r = _mm512_mask_fmsubadd_ps(a, 0, b, c);
45421        assert_eq_m512(r, a);
45422        let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c);
45423        let e = _mm512_setr_ps(
45424            1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
45425        );
45426        assert_eq_m512(r, e);
45427    }
45428
45429    #[simd_test(enable = "avx512f")]
45430    unsafe fn test_mm512_maskz_fmsubadd_ps() {
45431        let a = _mm512_set1_ps(1.);
45432        let b = _mm512_setr_ps(
45433            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45434        );
45435        let c = _mm512_set1_ps(1.);
45436        let r = _mm512_maskz_fmsubadd_ps(0, a, b, c);
45437        assert_eq_m512(r, _mm512_setzero_ps());
45438        let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c);
45439        let e = _mm512_setr_ps(
45440            1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
45441        );
45442        assert_eq_m512(r, e);
45443    }
45444
45445    #[simd_test(enable = "avx512f")]
45446    unsafe fn test_mm512_mask3_fmsubadd_ps() {
45447        let a = _mm512_set1_ps(1.);
45448        let b = _mm512_setr_ps(
45449            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45450        );
45451        let c = _mm512_setr_ps(
45452            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45453        );
45454        let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0);
45455        assert_eq_m512(r, c);
45456        let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111);
45457        let e = _mm512_setr_ps(
45458            1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
45459        );
45460        assert_eq_m512(r, e);
45461    }
45462
45463    #[simd_test(enable = "avx512f,avx512vl")]
45464    unsafe fn test_mm256_mask_fmsubadd_ps() {
45465        let a = _mm256_set1_ps(1.);
45466        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45467        let c = _mm256_set1_ps(1.);
45468        let r = _mm256_mask_fmsubadd_ps(a, 0, b, c);
45469        assert_eq_m256(r, a);
45470        let r = _mm256_mask_fmsubadd_ps(a, 0b11111111, b, c);
45471        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45472        assert_eq_m256(r, e);
45473    }
45474
45475    #[simd_test(enable = "avx512f,avx512vl")]
45476    unsafe fn test_mm256_maskz_fmsubadd_ps() {
45477        let a = _mm256_set1_ps(1.);
45478        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45479        let c = _mm256_set1_ps(1.);
45480        let r = _mm256_maskz_fmsubadd_ps(0, a, b, c);
45481        assert_eq_m256(r, _mm256_setzero_ps());
45482        let r = _mm256_maskz_fmsubadd_ps(0b11111111, a, b, c);
45483        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45484        assert_eq_m256(r, e);
45485    }
45486
45487    #[simd_test(enable = "avx512f,avx512vl")]
45488    unsafe fn test_mm256_mask3_fmsubadd_ps() {
45489        let a = _mm256_set1_ps(1.);
45490        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45491        let c = _mm256_set1_ps(1.);
45492        let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0);
45493        assert_eq_m256(r, c);
45494        let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0b11111111);
45495        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45496        assert_eq_m256(r, e);
45497    }
45498
45499    #[simd_test(enable = "avx512f,avx512vl")]
45500    unsafe fn test_mm_mask_fmsubadd_ps() {
45501        let a = _mm_set1_ps(1.);
45502        let b = _mm_set_ps(0., 1., 2., 3.);
45503        let c = _mm_set1_ps(1.);
45504        let r = _mm_mask_fmsubadd_ps(a, 0, b, c);
45505        assert_eq_m128(r, a);
45506        let r = _mm_mask_fmsubadd_ps(a, 0b00001111, b, c);
45507        let e = _mm_set_ps(-1., 2., 1., 4.);
45508        assert_eq_m128(r, e);
45509    }
45510
45511    #[simd_test(enable = "avx512f,avx512vl")]
45512    unsafe fn test_mm_maskz_fmsubadd_ps() {
45513        let a = _mm_set1_ps(1.);
45514        let b = _mm_set_ps(0., 1., 2., 3.);
45515        let c = _mm_set1_ps(1.);
45516        let r = _mm_maskz_fmsubadd_ps(0, a, b, c);
45517        assert_eq_m128(r, _mm_setzero_ps());
45518        let r = _mm_maskz_fmsubadd_ps(0b00001111, a, b, c);
45519        let e = _mm_set_ps(-1., 2., 1., 4.);
45520        assert_eq_m128(r, e);
45521    }
45522
45523    #[simd_test(enable = "avx512f,avx512vl")]
45524    unsafe fn test_mm_mask3_fmsubadd_ps() {
45525        let a = _mm_set1_ps(1.);
45526        let b = _mm_set_ps(0., 1., 2., 3.);
45527        let c = _mm_set1_ps(1.);
45528        let r = _mm_mask3_fmsubadd_ps(a, b, c, 0);
45529        assert_eq_m128(r, c);
45530        let r = _mm_mask3_fmsubadd_ps(a, b, c, 0b00001111);
45531        let e = _mm_set_ps(-1., 2., 1., 4.);
45532        assert_eq_m128(r, e);
45533    }
45534
45535    #[simd_test(enable = "avx512f")]
45536    unsafe fn test_mm512_fnmadd_ps() {
45537        let a = _mm512_set1_ps(1.);
45538        let b = _mm512_setr_ps(
45539            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45540        );
45541        let c = _mm512_set1_ps(1.);
45542        let r = _mm512_fnmadd_ps(a, b, c);
45543        let e = _mm512_setr_ps(
45544            1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
45545        );
45546        assert_eq_m512(r, e);
45547    }
45548
45549    #[simd_test(enable = "avx512f")]
45550    unsafe fn test_mm512_mask_fnmadd_ps() {
45551        let a = _mm512_set1_ps(1.);
45552        let b = _mm512_setr_ps(
45553            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45554        );
45555        let c = _mm512_set1_ps(1.);
45556        let r = _mm512_mask_fnmadd_ps(a, 0, b, c);
45557        assert_eq_m512(r, a);
45558        let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c);
45559        let e = _mm512_setr_ps(
45560            1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
45561        );
45562        assert_eq_m512(r, e);
45563    }
45564
45565    #[simd_test(enable = "avx512f")]
45566    unsafe fn test_mm512_maskz_fnmadd_ps() {
45567        let a = _mm512_set1_ps(1.);
45568        let b = _mm512_setr_ps(
45569            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45570        );
45571        let c = _mm512_set1_ps(1.);
45572        let r = _mm512_maskz_fnmadd_ps(0, a, b, c);
45573        assert_eq_m512(r, _mm512_setzero_ps());
45574        let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c);
45575        let e = _mm512_setr_ps(
45576            1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
45577        );
45578        assert_eq_m512(r, e);
45579    }
45580
45581    #[simd_test(enable = "avx512f")]
45582    unsafe fn test_mm512_mask3_fnmadd_ps() {
45583        let a = _mm512_set1_ps(1.);
45584        let b = _mm512_setr_ps(
45585            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45586        );
45587        let c = _mm512_setr_ps(
45588            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45589        );
45590        let r = _mm512_mask3_fnmadd_ps(a, b, c, 0);
45591        assert_eq_m512(r, c);
45592        let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111);
45593        let e = _mm512_setr_ps(
45594            1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
45595        );
45596        assert_eq_m512(r, e);
45597    }
45598
45599    #[simd_test(enable = "avx512f,avx512vl")]
45600    unsafe fn test_mm256_mask_fnmadd_ps() {
45601        let a = _mm256_set1_ps(1.);
45602        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45603        let c = _mm256_set1_ps(1.);
45604        let r = _mm256_mask_fnmadd_ps(a, 0, b, c);
45605        assert_eq_m256(r, a);
45606        let r = _mm256_mask_fnmadd_ps(a, 0b11111111, b, c);
45607        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45608        assert_eq_m256(r, e);
45609    }
45610
45611    #[simd_test(enable = "avx512f,avx512vl")]
45612    unsafe fn test_mm256_maskz_fnmadd_ps() {
45613        let a = _mm256_set1_ps(1.);
45614        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45615        let c = _mm256_set1_ps(1.);
45616        let r = _mm256_maskz_fnmadd_ps(0, a, b, c);
45617        assert_eq_m256(r, _mm256_setzero_ps());
45618        let r = _mm256_maskz_fnmadd_ps(0b11111111, a, b, c);
45619        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45620        assert_eq_m256(r, e);
45621    }
45622
45623    #[simd_test(enable = "avx512f,avx512vl")]
45624    unsafe fn test_mm256_mask3_fnmadd_ps() {
45625        let a = _mm256_set1_ps(1.);
45626        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45627        let c = _mm256_set1_ps(1.);
45628        let r = _mm256_mask3_fnmadd_ps(a, b, c, 0);
45629        assert_eq_m256(r, c);
45630        let r = _mm256_mask3_fnmadd_ps(a, b, c, 0b11111111);
45631        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45632        assert_eq_m256(r, e);
45633    }
45634
45635    #[simd_test(enable = "avx512f,avx512vl")]
45636    unsafe fn test_mm_mask_fnmadd_ps() {
45637        let a = _mm_set1_ps(1.);
45638        let b = _mm_set_ps(0., 1., 2., 3.);
45639        let c = _mm_set1_ps(1.);
45640        let r = _mm_mask_fnmadd_ps(a, 0, b, c);
45641        assert_eq_m128(r, a);
45642        let r = _mm_mask_fnmadd_ps(a, 0b00001111, b, c);
45643        let e = _mm_set_ps(1., 0., -1., -2.);
45644        assert_eq_m128(r, e);
45645    }
45646
45647    #[simd_test(enable = "avx512f,avx512vl")]
45648    unsafe fn test_mm_maskz_fnmadd_ps() {
45649        let a = _mm_set1_ps(1.);
45650        let b = _mm_set_ps(0., 1., 2., 3.);
45651        let c = _mm_set1_ps(1.);
45652        let r = _mm_maskz_fnmadd_ps(0, a, b, c);
45653        assert_eq_m128(r, _mm_setzero_ps());
45654        let r = _mm_maskz_fnmadd_ps(0b00001111, a, b, c);
45655        let e = _mm_set_ps(1., 0., -1., -2.);
45656        assert_eq_m128(r, e);
45657    }
45658
45659    #[simd_test(enable = "avx512f,avx512vl")]
45660    unsafe fn test_mm_mask3_fnmadd_ps() {
45661        let a = _mm_set1_ps(1.);
45662        let b = _mm_set_ps(0., 1., 2., 3.);
45663        let c = _mm_set1_ps(1.);
45664        let r = _mm_mask3_fnmadd_ps(a, b, c, 0);
45665        assert_eq_m128(r, c);
45666        let r = _mm_mask3_fnmadd_ps(a, b, c, 0b00001111);
45667        let e = _mm_set_ps(1., 0., -1., -2.);
45668        assert_eq_m128(r, e);
45669    }
45670
45671    #[simd_test(enable = "avx512f")]
45672    unsafe fn test_mm512_fnmsub_ps() {
45673        let a = _mm512_set1_ps(1.);
45674        let b = _mm512_setr_ps(
45675            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45676        );
45677        let c = _mm512_set1_ps(1.);
45678        let r = _mm512_fnmsub_ps(a, b, c);
45679        let e = _mm512_setr_ps(
45680            -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
45681        );
45682        assert_eq_m512(r, e);
45683    }
45684
45685    #[simd_test(enable = "avx512f")]
45686    unsafe fn test_mm512_mask_fnmsub_ps() {
45687        let a = _mm512_set1_ps(1.);
45688        let b = _mm512_setr_ps(
45689            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45690        );
45691        let c = _mm512_set1_ps(1.);
45692        let r = _mm512_mask_fnmsub_ps(a, 0, b, c);
45693        assert_eq_m512(r, a);
45694        let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c);
45695        let e = _mm512_setr_ps(
45696            -1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
45697        );
45698        assert_eq_m512(r, e);
45699    }
45700
45701    #[simd_test(enable = "avx512f")]
45702    unsafe fn test_mm512_maskz_fnmsub_ps() {
45703        let a = _mm512_set1_ps(1.);
45704        let b = _mm512_setr_ps(
45705            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45706        );
45707        let c = _mm512_set1_ps(1.);
45708        let r = _mm512_maskz_fnmsub_ps(0, a, b, c);
45709        assert_eq_m512(r, _mm512_setzero_ps());
45710        let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c);
45711        let e = _mm512_setr_ps(
45712            -1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
45713        );
45714        assert_eq_m512(r, e);
45715    }
45716
45717    #[simd_test(enable = "avx512f")]
45718    unsafe fn test_mm512_mask3_fnmsub_ps() {
45719        let a = _mm512_set1_ps(1.);
45720        let b = _mm512_setr_ps(
45721            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45722        );
45723        let c = _mm512_setr_ps(
45724            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45725        );
45726        let r = _mm512_mask3_fnmsub_ps(a, b, c, 0);
45727        assert_eq_m512(r, c);
45728        let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111);
45729        let e = _mm512_setr_ps(
45730            -1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
45731        );
45732        assert_eq_m512(r, e);
45733    }
45734
45735    #[simd_test(enable = "avx512f,avx512vl")]
45736    unsafe fn test_mm256_mask_fnmsub_ps() {
45737        let a = _mm256_set1_ps(1.);
45738        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45739        let c = _mm256_set1_ps(1.);
45740        let r = _mm256_mask_fnmsub_ps(a, 0, b, c);
45741        assert_eq_m256(r, a);
45742        let r = _mm256_mask_fnmsub_ps(a, 0b11111111, b, c);
45743        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45744        assert_eq_m256(r, e);
45745    }
45746
45747    #[simd_test(enable = "avx512f,avx512vl")]
45748    unsafe fn test_mm256_maskz_fnmsub_ps() {
45749        let a = _mm256_set1_ps(1.);
45750        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45751        let c = _mm256_set1_ps(1.);
45752        let r = _mm256_maskz_fnmsub_ps(0, a, b, c);
45753        assert_eq_m256(r, _mm256_setzero_ps());
45754        let r = _mm256_maskz_fnmsub_ps(0b11111111, a, b, c);
45755        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45756        assert_eq_m256(r, e);
45757    }
45758
45759    #[simd_test(enable = "avx512f,avx512vl")]
45760    unsafe fn test_mm256_mask3_fnmsub_ps() {
45761        let a = _mm256_set1_ps(1.);
45762        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45763        let c = _mm256_set1_ps(1.);
45764        let r = _mm256_mask3_fnmsub_ps(a, b, c, 0);
45765        assert_eq_m256(r, c);
45766        let r = _mm256_mask3_fnmsub_ps(a, b, c, 0b11111111);
45767        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45768        assert_eq_m256(r, e);
45769    }
45770
45771    #[simd_test(enable = "avx512f,avx512vl")]
45772    unsafe fn test_mm_mask_fnmsub_ps() {
45773        let a = _mm_set1_ps(1.);
45774        let b = _mm_set_ps(0., 1., 2., 3.);
45775        let c = _mm_set1_ps(1.);
45776        let r = _mm_mask_fnmsub_ps(a, 0, b, c);
45777        assert_eq_m128(r, a);
45778        let r = _mm_mask_fnmsub_ps(a, 0b00001111, b, c);
45779        let e = _mm_set_ps(-1., -2., -3., -4.);
45780        assert_eq_m128(r, e);
45781    }
45782
45783    #[simd_test(enable = "avx512f,avx512vl")]
45784    unsafe fn test_mm_maskz_fnmsub_ps() {
45785        let a = _mm_set1_ps(1.);
45786        let b = _mm_set_ps(0., 1., 2., 3.);
45787        let c = _mm_set1_ps(1.);
45788        let r = _mm_maskz_fnmsub_ps(0, a, b, c);
45789        assert_eq_m128(r, _mm_setzero_ps());
45790        let r = _mm_maskz_fnmsub_ps(0b00001111, a, b, c);
45791        let e = _mm_set_ps(-1., -2., -3., -4.);
45792        assert_eq_m128(r, e);
45793    }
45794
45795    #[simd_test(enable = "avx512f,avx512vl")]
45796    unsafe fn test_mm_mask3_fnmsub_ps() {
45797        let a = _mm_set1_ps(1.);
45798        let b = _mm_set_ps(0., 1., 2., 3.);
45799        let c = _mm_set1_ps(1.);
45800        let r = _mm_mask3_fnmsub_ps(a, b, c, 0);
45801        assert_eq_m128(r, c);
45802        let r = _mm_mask3_fnmsub_ps(a, b, c, 0b00001111);
45803        let e = _mm_set_ps(-1., -2., -3., -4.);
45804        assert_eq_m128(r, e);
45805    }
45806
45807    #[simd_test(enable = "avx512f")]
45808    unsafe fn test_mm512_rcp14_ps() {
45809        let a = _mm512_set1_ps(3.);
45810        let r = _mm512_rcp14_ps(a);
45811        let e = _mm512_set1_ps(0.33333206);
45812        assert_eq_m512(r, e);
45813    }
45814
45815    #[simd_test(enable = "avx512f")]
45816    unsafe fn test_mm512_mask_rcp14_ps() {
45817        let a = _mm512_set1_ps(3.);
45818        let r = _mm512_mask_rcp14_ps(a, 0, a);
45819        assert_eq_m512(r, a);
45820        let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a);
45821        let e = _mm512_setr_ps(
45822            3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45823            0.33333206, 0.33333206, 0.33333206, 0.33333206,
45824        );
45825        assert_eq_m512(r, e);
45826    }
45827
45828    #[simd_test(enable = "avx512f")]
45829    unsafe fn test_mm512_maskz_rcp14_ps() {
45830        let a = _mm512_set1_ps(3.);
45831        let r = _mm512_maskz_rcp14_ps(0, a);
45832        assert_eq_m512(r, _mm512_setzero_ps());
45833        let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a);
45834        let e = _mm512_setr_ps(
45835            0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45836            0.33333206, 0.33333206, 0.33333206, 0.33333206,
45837        );
45838        assert_eq_m512(r, e);
45839    }
45840
45841    #[simd_test(enable = "avx512f,avx512vl")]
45842    unsafe fn test_mm256_rcp14_ps() {
45843        let a = _mm256_set1_ps(3.);
45844        let r = _mm256_rcp14_ps(a);
45845        let e = _mm256_set1_ps(0.33333206);
45846        assert_eq_m256(r, e);
45847    }
45848
45849    #[simd_test(enable = "avx512f,avx512vl")]
45850    unsafe fn test_mm256_mask_rcp14_ps() {
45851        let a = _mm256_set1_ps(3.);
45852        let r = _mm256_mask_rcp14_ps(a, 0, a);
45853        assert_eq_m256(r, a);
45854        let r = _mm256_mask_rcp14_ps(a, 0b11111111, a);
45855        let e = _mm256_set1_ps(0.33333206);
45856        assert_eq_m256(r, e);
45857    }
45858
45859    #[simd_test(enable = "avx512f,avx512vl")]
45860    unsafe fn test_mm256_maskz_rcp14_ps() {
45861        let a = _mm256_set1_ps(3.);
45862        let r = _mm256_maskz_rcp14_ps(0, a);
45863        assert_eq_m256(r, _mm256_setzero_ps());
45864        let r = _mm256_maskz_rcp14_ps(0b11111111, a);
45865        let e = _mm256_set1_ps(0.33333206);
45866        assert_eq_m256(r, e);
45867    }
45868
45869    #[simd_test(enable = "avx512f,avx512vl")]
45870    unsafe fn test_mm_rcp14_ps() {
45871        let a = _mm_set1_ps(3.);
45872        let r = _mm_rcp14_ps(a);
45873        let e = _mm_set1_ps(0.33333206);
45874        assert_eq_m128(r, e);
45875    }
45876
45877    #[simd_test(enable = "avx512f,avx512vl")]
45878    unsafe fn test_mm_mask_rcp14_ps() {
45879        let a = _mm_set1_ps(3.);
45880        let r = _mm_mask_rcp14_ps(a, 0, a);
45881        assert_eq_m128(r, a);
45882        let r = _mm_mask_rcp14_ps(a, 0b00001111, a);
45883        let e = _mm_set1_ps(0.33333206);
45884        assert_eq_m128(r, e);
45885    }
45886
45887    #[simd_test(enable = "avx512f,avx512vl")]
45888    unsafe fn test_mm_maskz_rcp14_ps() {
45889        let a = _mm_set1_ps(3.);
45890        let r = _mm_maskz_rcp14_ps(0, a);
45891        assert_eq_m128(r, _mm_setzero_ps());
45892        let r = _mm_maskz_rcp14_ps(0b00001111, a);
45893        let e = _mm_set1_ps(0.33333206);
45894        assert_eq_m128(r, e);
45895    }
45896
45897    #[simd_test(enable = "avx512f")]
45898    unsafe fn test_mm512_rsqrt14_ps() {
45899        let a = _mm512_set1_ps(3.);
45900        let r = _mm512_rsqrt14_ps(a);
45901        let e = _mm512_set1_ps(0.5773392);
45902        assert_eq_m512(r, e);
45903    }
45904
45905    #[simd_test(enable = "avx512f")]
45906    unsafe fn test_mm512_mask_rsqrt14_ps() {
45907        let a = _mm512_set1_ps(3.);
45908        let r = _mm512_mask_rsqrt14_ps(a, 0, a);
45909        assert_eq_m512(r, a);
45910        let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a);
45911        let e = _mm512_setr_ps(
45912            3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
45913            0.5773392, 0.5773392, 0.5773392,
45914        );
45915        assert_eq_m512(r, e);
45916    }
45917
45918    #[simd_test(enable = "avx512f")]
45919    unsafe fn test_mm512_maskz_rsqrt14_ps() {
45920        let a = _mm512_set1_ps(3.);
45921        let r = _mm512_maskz_rsqrt14_ps(0, a);
45922        assert_eq_m512(r, _mm512_setzero_ps());
45923        let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a);
45924        let e = _mm512_setr_ps(
45925            0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
45926            0.5773392, 0.5773392, 0.5773392,
45927        );
45928        assert_eq_m512(r, e);
45929    }
45930
45931    #[simd_test(enable = "avx512f,avx512vl")]
45932    unsafe fn test_mm256_rsqrt14_ps() {
45933        let a = _mm256_set1_ps(3.);
45934        let r = _mm256_rsqrt14_ps(a);
45935        let e = _mm256_set1_ps(0.5773392);
45936        assert_eq_m256(r, e);
45937    }
45938
45939    #[simd_test(enable = "avx512f,avx512vl")]
45940    unsafe fn test_mm256_mask_rsqrt14_ps() {
45941        let a = _mm256_set1_ps(3.);
45942        let r = _mm256_mask_rsqrt14_ps(a, 0, a);
45943        assert_eq_m256(r, a);
45944        let r = _mm256_mask_rsqrt14_ps(a, 0b11111111, a);
45945        let e = _mm256_set1_ps(0.5773392);
45946        assert_eq_m256(r, e);
45947    }
45948
45949    #[simd_test(enable = "avx512f,avx512vl")]
45950    unsafe fn test_mm256_maskz_rsqrt14_ps() {
45951        let a = _mm256_set1_ps(3.);
45952        let r = _mm256_maskz_rsqrt14_ps(0, a);
45953        assert_eq_m256(r, _mm256_setzero_ps());
45954        let r = _mm256_maskz_rsqrt14_ps(0b11111111, a);
45955        let e = _mm256_set1_ps(0.5773392);
45956        assert_eq_m256(r, e);
45957    }
45958
45959    #[simd_test(enable = "avx512f,avx512vl")]
45960    unsafe fn test_mm_rsqrt14_ps() {
45961        let a = _mm_set1_ps(3.);
45962        let r = _mm_rsqrt14_ps(a);
45963        let e = _mm_set1_ps(0.5773392);
45964        assert_eq_m128(r, e);
45965    }
45966
45967    #[simd_test(enable = "avx512f,avx512vl")]
45968    unsafe fn test_mm_mask_rsqrt14_ps() {
45969        let a = _mm_set1_ps(3.);
45970        let r = _mm_mask_rsqrt14_ps(a, 0, a);
45971        assert_eq_m128(r, a);
45972        let r = _mm_mask_rsqrt14_ps(a, 0b00001111, a);
45973        let e = _mm_set1_ps(0.5773392);
45974        assert_eq_m128(r, e);
45975    }
45976
45977    #[simd_test(enable = "avx512f,avx512vl")]
45978    unsafe fn test_mm_maskz_rsqrt14_ps() {
45979        let a = _mm_set1_ps(3.);
45980        let r = _mm_maskz_rsqrt14_ps(0, a);
45981        assert_eq_m128(r, _mm_setzero_ps());
45982        let r = _mm_maskz_rsqrt14_ps(0b00001111, a);
45983        let e = _mm_set1_ps(0.5773392);
45984        assert_eq_m128(r, e);
45985    }
45986
45987    #[simd_test(enable = "avx512f")]
45988    unsafe fn test_mm512_getexp_ps() {
45989        let a = _mm512_set1_ps(3.);
45990        let r = _mm512_getexp_ps(a);
45991        let e = _mm512_set1_ps(1.);
45992        assert_eq_m512(r, e);
45993    }
45994
45995    #[simd_test(enable = "avx512f")]
45996    unsafe fn test_mm512_mask_getexp_ps() {
45997        let a = _mm512_set1_ps(3.);
45998        let r = _mm512_mask_getexp_ps(a, 0, a);
45999        assert_eq_m512(r, a);
46000        let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a);
46001        let e = _mm512_setr_ps(
46002            3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
46003        );
46004        assert_eq_m512(r, e);
46005    }
46006
46007    #[simd_test(enable = "avx512f")]
46008    unsafe fn test_mm512_maskz_getexp_ps() {
46009        let a = _mm512_set1_ps(3.);
46010        let r = _mm512_maskz_getexp_ps(0, a);
46011        assert_eq_m512(r, _mm512_setzero_ps());
46012        let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a);
46013        let e = _mm512_setr_ps(
46014            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
46015        );
46016        assert_eq_m512(r, e);
46017    }
46018
46019    #[simd_test(enable = "avx512f,avx512vl")]
46020    unsafe fn test_mm256_getexp_ps() {
46021        let a = _mm256_set1_ps(3.);
46022        let r = _mm256_getexp_ps(a);
46023        let e = _mm256_set1_ps(1.);
46024        assert_eq_m256(r, e);
46025    }
46026
46027    #[simd_test(enable = "avx512f,avx512vl")]
46028    unsafe fn test_mm256_mask_getexp_ps() {
46029        let a = _mm256_set1_ps(3.);
46030        let r = _mm256_mask_getexp_ps(a, 0, a);
46031        assert_eq_m256(r, a);
46032        let r = _mm256_mask_getexp_ps(a, 0b11111111, a);
46033        let e = _mm256_set1_ps(1.);
46034        assert_eq_m256(r, e);
46035    }
46036
46037    #[simd_test(enable = "avx512f,avx512vl")]
46038    unsafe fn test_mm256_maskz_getexp_ps() {
46039        let a = _mm256_set1_ps(3.);
46040        let r = _mm256_maskz_getexp_ps(0, a);
46041        assert_eq_m256(r, _mm256_setzero_ps());
46042        let r = _mm256_maskz_getexp_ps(0b11111111, a);
46043        let e = _mm256_set1_ps(1.);
46044        assert_eq_m256(r, e);
46045    }
46046
46047    #[simd_test(enable = "avx512f,avx512vl")]
46048    unsafe fn test_mm_getexp_ps() {
46049        let a = _mm_set1_ps(3.);
46050        let r = _mm_getexp_ps(a);
46051        let e = _mm_set1_ps(1.);
46052        assert_eq_m128(r, e);
46053    }
46054
46055    #[simd_test(enable = "avx512f,avx512vl")]
46056    unsafe fn test_mm_mask_getexp_ps() {
46057        let a = _mm_set1_ps(3.);
46058        let r = _mm_mask_getexp_ps(a, 0, a);
46059        assert_eq_m128(r, a);
46060        let r = _mm_mask_getexp_ps(a, 0b00001111, a);
46061        let e = _mm_set1_ps(1.);
46062        assert_eq_m128(r, e);
46063    }
46064
46065    #[simd_test(enable = "avx512f,avx512vl")]
46066    unsafe fn test_mm_maskz_getexp_ps() {
46067        let a = _mm_set1_ps(3.);
46068        let r = _mm_maskz_getexp_ps(0, a);
46069        assert_eq_m128(r, _mm_setzero_ps());
46070        let r = _mm_maskz_getexp_ps(0b00001111, a);
46071        let e = _mm_set1_ps(1.);
46072        assert_eq_m128(r, e);
46073    }
46074
46075    #[simd_test(enable = "avx512f")]
46076    unsafe fn test_mm512_roundscale_ps() {
46077        let a = _mm512_set1_ps(1.1);
46078        let r = _mm512_roundscale_ps::<0b00_00_00_00>(a);
46079        let e = _mm512_set1_ps(1.0);
46080        assert_eq_m512(r, e);
46081    }
46082
46083    #[simd_test(enable = "avx512f")]
46084    unsafe fn test_mm512_mask_roundscale_ps() {
46085        let a = _mm512_set1_ps(1.1);
46086        let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46087        let e = _mm512_set1_ps(1.1);
46088        assert_eq_m512(r, e);
46089        let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111_11111111, a);
46090        let e = _mm512_set1_ps(1.0);
46091        assert_eq_m512(r, e);
46092    }
46093
46094    #[simd_test(enable = "avx512f")]
46095    unsafe fn test_mm512_maskz_roundscale_ps() {
46096        let a = _mm512_set1_ps(1.1);
46097        let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46098        assert_eq_m512(r, _mm512_setzero_ps());
46099        let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111_11111111, a);
46100        let e = _mm512_set1_ps(1.0);
46101        assert_eq_m512(r, e);
46102    }
46103
46104    #[simd_test(enable = "avx512f,avx512vl")]
46105    unsafe fn test_mm256_roundscale_ps() {
46106        let a = _mm256_set1_ps(1.1);
46107        let r = _mm256_roundscale_ps::<0b00_00_00_00>(a);
46108        let e = _mm256_set1_ps(1.0);
46109        assert_eq_m256(r, e);
46110    }
46111
46112    #[simd_test(enable = "avx512f,avx512vl")]
46113    unsafe fn test_mm256_mask_roundscale_ps() {
46114        let a = _mm256_set1_ps(1.1);
46115        let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46116        let e = _mm256_set1_ps(1.1);
46117        assert_eq_m256(r, e);
46118        let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111, a);
46119        let e = _mm256_set1_ps(1.0);
46120        assert_eq_m256(r, e);
46121    }
46122
46123    #[simd_test(enable = "avx512f,avx512vl")]
46124    unsafe fn test_mm256_maskz_roundscale_ps() {
46125        let a = _mm256_set1_ps(1.1);
46126        let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46127        assert_eq_m256(r, _mm256_setzero_ps());
46128        let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111, a);
46129        let e = _mm256_set1_ps(1.0);
46130        assert_eq_m256(r, e);
46131    }
46132
46133    #[simd_test(enable = "avx512f,avx512vl")]
46134    unsafe fn test_mm_roundscale_ps() {
46135        let a = _mm_set1_ps(1.1);
46136        let r = _mm_roundscale_ps::<0b00_00_00_00>(a);
46137        let e = _mm_set1_ps(1.0);
46138        assert_eq_m128(r, e);
46139    }
46140
46141    #[simd_test(enable = "avx512f,avx512vl")]
46142    unsafe fn test_mm_mask_roundscale_ps() {
46143        let a = _mm_set1_ps(1.1);
46144        let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46145        let e = _mm_set1_ps(1.1);
46146        assert_eq_m128(r, e);
46147        let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0b00001111, a);
46148        let e = _mm_set1_ps(1.0);
46149        assert_eq_m128(r, e);
46150    }
46151
46152    #[simd_test(enable = "avx512f,avx512vl")]
46153    unsafe fn test_mm_maskz_roundscale_ps() {
46154        let a = _mm_set1_ps(1.1);
46155        let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46156        assert_eq_m128(r, _mm_setzero_ps());
46157        let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0b00001111, a);
46158        let e = _mm_set1_ps(1.0);
46159        assert_eq_m128(r, e);
46160    }
46161
46162    #[simd_test(enable = "avx512f")]
46163    unsafe fn test_mm512_scalef_ps() {
46164        let a = _mm512_set1_ps(1.);
46165        let b = _mm512_set1_ps(3.);
46166        let r = _mm512_scalef_ps(a, b);
46167        let e = _mm512_set1_ps(8.);
46168        assert_eq_m512(r, e);
46169    }
46170
46171    #[simd_test(enable = "avx512f")]
46172    unsafe fn test_mm512_mask_scalef_ps() {
46173        let a = _mm512_set1_ps(1.);
46174        let b = _mm512_set1_ps(3.);
46175        let r = _mm512_mask_scalef_ps(a, 0, a, b);
46176        assert_eq_m512(r, a);
46177        let r = _mm512_mask_scalef_ps(a, 0b11111111_00000000, a, b);
46178        let e = _mm512_set_ps(
46179            8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
46180        );
46181        assert_eq_m512(r, e);
46182    }
46183
46184    #[simd_test(enable = "avx512f")]
46185    unsafe fn test_mm512_maskz_scalef_ps() {
46186        let a = _mm512_set1_ps(1.);
46187        let b = _mm512_set1_ps(3.);
46188        let r = _mm512_maskz_scalef_ps(0, a, b);
46189        assert_eq_m512(r, _mm512_setzero_ps());
46190        let r = _mm512_maskz_scalef_ps(0b11111111_00000000, a, b);
46191        let e = _mm512_set_ps(
46192            8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
46193        );
46194        assert_eq_m512(r, e);
46195    }
46196
46197    #[simd_test(enable = "avx512f,avx512vl")]
46198    unsafe fn test_mm256_scalef_ps() {
46199        let a = _mm256_set1_ps(1.);
46200        let b = _mm256_set1_ps(3.);
46201        let r = _mm256_scalef_ps(a, b);
46202        let e = _mm256_set1_ps(8.);
46203        assert_eq_m256(r, e);
46204    }
46205
46206    #[simd_test(enable = "avx512f,avx512vl")]
46207    unsafe fn test_mm256_mask_scalef_ps() {
46208        let a = _mm256_set1_ps(1.);
46209        let b = _mm256_set1_ps(3.);
46210        let r = _mm256_mask_scalef_ps(a, 0, a, b);
46211        assert_eq_m256(r, a);
46212        let r = _mm256_mask_scalef_ps(a, 0b11111111, a, b);
46213        let e = _mm256_set1_ps(8.);
46214        assert_eq_m256(r, e);
46215    }
46216
46217    #[simd_test(enable = "avx512f,avx512vl")]
46218    unsafe fn test_mm256_maskz_scalef_ps() {
46219        let a = _mm256_set1_ps(1.);
46220        let b = _mm256_set1_ps(3.);
46221        let r = _mm256_maskz_scalef_ps(0, a, b);
46222        assert_eq_m256(r, _mm256_setzero_ps());
46223        let r = _mm256_maskz_scalef_ps(0b11111111, a, b);
46224        let e = _mm256_set1_ps(8.);
46225        assert_eq_m256(r, e);
46226    }
46227
46228    #[simd_test(enable = "avx512f,avx512vl")]
46229    unsafe fn test_mm_scalef_ps() {
46230        let a = _mm_set1_ps(1.);
46231        let b = _mm_set1_ps(3.);
46232        let r = _mm_scalef_ps(a, b);
46233        let e = _mm_set1_ps(8.);
46234        assert_eq_m128(r, e);
46235    }
46236
46237    #[simd_test(enable = "avx512f,avx512vl")]
46238    unsafe fn test_mm_mask_scalef_ps() {
46239        let a = _mm_set1_ps(1.);
46240        let b = _mm_set1_ps(3.);
46241        let r = _mm_mask_scalef_ps(a, 0, a, b);
46242        assert_eq_m128(r, a);
46243        let r = _mm_mask_scalef_ps(a, 0b00001111, a, b);
46244        let e = _mm_set1_ps(8.);
46245        assert_eq_m128(r, e);
46246    }
46247
46248    #[simd_test(enable = "avx512f,avx512vl")]
46249    unsafe fn test_mm_maskz_scalef_ps() {
46250        let a = _mm_set1_ps(1.);
46251        let b = _mm_set1_ps(3.);
46252        let r = _mm_maskz_scalef_ps(0, a, b);
46253        assert_eq_m128(r, _mm_setzero_ps());
46254        let r = _mm_maskz_scalef_ps(0b00001111, a, b);
46255        let e = _mm_set1_ps(8.);
46256        assert_eq_m128(r, e);
46257    }
46258
46259    #[simd_test(enable = "avx512f")]
46260    unsafe fn test_mm512_fixupimm_ps() {
46261        let a = _mm512_set1_ps(f32::NAN);
46262        let b = _mm512_set1_ps(f32::MAX);
46263        let c = _mm512_set1_epi32(i32::MAX);
46264        //let r = _mm512_fixupimm_ps(a, b, c, 5);
46265        let r = _mm512_fixupimm_ps::<5>(a, b, c);
46266        let e = _mm512_set1_ps(0.0);
46267        assert_eq_m512(r, e);
46268    }
46269
46270    #[simd_test(enable = "avx512f")]
46271    unsafe fn test_mm512_mask_fixupimm_ps() {
46272        #[rustfmt::skip]
46273        let a = _mm512_set_ps(
46274            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46275            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46276            1., 1., 1., 1.,
46277            1., 1., 1., 1.,
46278        );
46279        let b = _mm512_set1_ps(f32::MAX);
46280        let c = _mm512_set1_epi32(i32::MAX);
46281        let r = _mm512_mask_fixupimm_ps::<5>(a, 0b11111111_00000000, b, c);
46282        let e = _mm512_set_ps(
46283            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
46284        );
46285        assert_eq_m512(r, e);
46286    }
46287
46288    #[simd_test(enable = "avx512f")]
46289    unsafe fn test_mm512_maskz_fixupimm_ps() {
46290        #[rustfmt::skip]
46291        let a = _mm512_set_ps(
46292            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46293            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46294            1., 1., 1., 1.,
46295            1., 1., 1., 1.,
46296        );
46297        let b = _mm512_set1_ps(f32::MAX);
46298        let c = _mm512_set1_epi32(i32::MAX);
46299        let r = _mm512_maskz_fixupimm_ps::<5>(0b11111111_00000000, a, b, c);
46300        let e = _mm512_set_ps(
46301            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
46302        );
46303        assert_eq_m512(r, e);
46304    }
46305
46306    #[simd_test(enable = "avx512f,avx512vl")]
46307    unsafe fn test_mm256_fixupimm_ps() {
46308        let a = _mm256_set1_ps(f32::NAN);
46309        let b = _mm256_set1_ps(f32::MAX);
46310        let c = _mm256_set1_epi32(i32::MAX);
46311        let r = _mm256_fixupimm_ps::<5>(a, b, c);
46312        let e = _mm256_set1_ps(0.0);
46313        assert_eq_m256(r, e);
46314    }
46315
46316    #[simd_test(enable = "avx512f,avx512vl")]
46317    unsafe fn test_mm256_mask_fixupimm_ps() {
46318        let a = _mm256_set1_ps(f32::NAN);
46319        let b = _mm256_set1_ps(f32::MAX);
46320        let c = _mm256_set1_epi32(i32::MAX);
46321        let r = _mm256_mask_fixupimm_ps::<5>(a, 0b11111111, b, c);
46322        let e = _mm256_set1_ps(0.0);
46323        assert_eq_m256(r, e);
46324    }
46325
46326    #[simd_test(enable = "avx512f,avx512vl")]
46327    unsafe fn test_mm256_maskz_fixupimm_ps() {
46328        let a = _mm256_set1_ps(f32::NAN);
46329        let b = _mm256_set1_ps(f32::MAX);
46330        let c = _mm256_set1_epi32(i32::MAX);
46331        let r = _mm256_maskz_fixupimm_ps::<5>(0b11111111, a, b, c);
46332        let e = _mm256_set1_ps(0.0);
46333        assert_eq_m256(r, e);
46334    }
46335
46336    #[simd_test(enable = "avx512f,avx512vl")]
46337    unsafe fn test_mm_fixupimm_ps() {
46338        let a = _mm_set1_ps(f32::NAN);
46339        let b = _mm_set1_ps(f32::MAX);
46340        let c = _mm_set1_epi32(i32::MAX);
46341        let r = _mm_fixupimm_ps::<5>(a, b, c);
46342        let e = _mm_set1_ps(0.0);
46343        assert_eq_m128(r, e);
46344    }
46345
46346    #[simd_test(enable = "avx512f,avx512vl")]
46347    unsafe fn test_mm_mask_fixupimm_ps() {
46348        let a = _mm_set1_ps(f32::NAN);
46349        let b = _mm_set1_ps(f32::MAX);
46350        let c = _mm_set1_epi32(i32::MAX);
46351        let r = _mm_mask_fixupimm_ps::<5>(a, 0b00001111, b, c);
46352        let e = _mm_set1_ps(0.0);
46353        assert_eq_m128(r, e);
46354    }
46355
46356    #[simd_test(enable = "avx512f,avx512vl")]
46357    unsafe fn test_mm_maskz_fixupimm_ps() {
46358        let a = _mm_set1_ps(f32::NAN);
46359        let b = _mm_set1_ps(f32::MAX);
46360        let c = _mm_set1_epi32(i32::MAX);
46361        let r = _mm_maskz_fixupimm_ps::<5>(0b00001111, a, b, c);
46362        let e = _mm_set1_ps(0.0);
46363        assert_eq_m128(r, e);
46364    }
46365
46366    #[simd_test(enable = "avx512f")]
46367    unsafe fn test_mm512_ternarylogic_epi32() {
46368        let a = _mm512_set1_epi32(1 << 2);
46369        let b = _mm512_set1_epi32(1 << 1);
46370        let c = _mm512_set1_epi32(1 << 0);
46371        let r = _mm512_ternarylogic_epi32::<8>(a, b, c);
46372        let e = _mm512_set1_epi32(0);
46373        assert_eq_m512i(r, e);
46374    }
46375
46376    #[simd_test(enable = "avx512f")]
46377    unsafe fn test_mm512_mask_ternarylogic_epi32() {
46378        let src = _mm512_set1_epi32(1 << 2);
46379        let a = _mm512_set1_epi32(1 << 1);
46380        let b = _mm512_set1_epi32(1 << 0);
46381        let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46382        assert_eq_m512i(r, src);
46383        let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0b11111111_11111111, a, b);
46384        let e = _mm512_set1_epi32(0);
46385        assert_eq_m512i(r, e);
46386    }
46387
46388    #[simd_test(enable = "avx512f")]
46389    unsafe fn test_mm512_maskz_ternarylogic_epi32() {
46390        let a = _mm512_set1_epi32(1 << 2);
46391        let b = _mm512_set1_epi32(1 << 1);
46392        let c = _mm512_set1_epi32(1 << 0);
46393        let r = _mm512_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46394        assert_eq_m512i(r, _mm512_setzero_si512());
46395        let r = _mm512_maskz_ternarylogic_epi32::<8>(0b11111111_11111111, a, b, c);
46396        let e = _mm512_set1_epi32(0);
46397        assert_eq_m512i(r, e);
46398    }
46399
46400    #[simd_test(enable = "avx512f,avx512vl")]
46401    unsafe fn test_mm256_ternarylogic_epi32() {
46402        let a = _mm256_set1_epi32(1 << 2);
46403        let b = _mm256_set1_epi32(1 << 1);
46404        let c = _mm256_set1_epi32(1 << 0);
46405        let r = _mm256_ternarylogic_epi32::<8>(a, b, c);
46406        let e = _mm256_set1_epi32(0);
46407        assert_eq_m256i(r, e);
46408    }
46409
46410    #[simd_test(enable = "avx512f,avx512vl")]
46411    unsafe fn test_mm256_mask_ternarylogic_epi32() {
46412        let src = _mm256_set1_epi32(1 << 2);
46413        let a = _mm256_set1_epi32(1 << 1);
46414        let b = _mm256_set1_epi32(1 << 0);
46415        let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46416        assert_eq_m256i(r, src);
46417        let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0b11111111, a, b);
46418        let e = _mm256_set1_epi32(0);
46419        assert_eq_m256i(r, e);
46420    }
46421
46422    #[simd_test(enable = "avx512f,avx512vl")]
46423    unsafe fn test_mm256_maskz_ternarylogic_epi32() {
46424        let a = _mm256_set1_epi32(1 << 2);
46425        let b = _mm256_set1_epi32(1 << 1);
46426        let c = _mm256_set1_epi32(1 << 0);
46427        let r = _mm256_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46428        assert_eq_m256i(r, _mm256_setzero_si256());
46429        let r = _mm256_maskz_ternarylogic_epi32::<8>(0b11111111, a, b, c);
46430        let e = _mm256_set1_epi32(0);
46431        assert_eq_m256i(r, e);
46432    }
46433
46434    #[simd_test(enable = "avx512f,avx512vl")]
46435    unsafe fn test_mm_ternarylogic_epi32() {
46436        let a = _mm_set1_epi32(1 << 2);
46437        let b = _mm_set1_epi32(1 << 1);
46438        let c = _mm_set1_epi32(1 << 0);
46439        let r = _mm_ternarylogic_epi32::<8>(a, b, c);
46440        let e = _mm_set1_epi32(0);
46441        assert_eq_m128i(r, e);
46442    }
46443
46444    #[simd_test(enable = "avx512f,avx512vl")]
46445    unsafe fn test_mm_mask_ternarylogic_epi32() {
46446        let src = _mm_set1_epi32(1 << 2);
46447        let a = _mm_set1_epi32(1 << 1);
46448        let b = _mm_set1_epi32(1 << 0);
46449        let r = _mm_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46450        assert_eq_m128i(r, src);
46451        let r = _mm_mask_ternarylogic_epi32::<8>(src, 0b00001111, a, b);
46452        let e = _mm_set1_epi32(0);
46453        assert_eq_m128i(r, e);
46454    }
46455
46456    #[simd_test(enable = "avx512f,avx512vl")]
46457    unsafe fn test_mm_maskz_ternarylogic_epi32() {
46458        let a = _mm_set1_epi32(1 << 2);
46459        let b = _mm_set1_epi32(1 << 1);
46460        let c = _mm_set1_epi32(1 << 0);
46461        let r = _mm_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46462        assert_eq_m128i(r, _mm_setzero_si128());
46463        let r = _mm_maskz_ternarylogic_epi32::<8>(0b00001111, a, b, c);
46464        let e = _mm_set1_epi32(0);
46465        assert_eq_m128i(r, e);
46466    }
46467
46468    #[simd_test(enable = "avx512f")]
46469    unsafe fn test_mm512_getmant_ps() {
46470        let a = _mm512_set1_ps(10.);
46471        let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46472        let e = _mm512_set1_ps(1.25);
46473        assert_eq_m512(r, e);
46474    }
46475
46476    #[simd_test(enable = "avx512f")]
46477    unsafe fn test_mm512_mask_getmant_ps() {
46478        let a = _mm512_set1_ps(10.);
46479        let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46480        assert_eq_m512(r, a);
46481        let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(
46482            a,
46483            0b11111111_00000000,
46484            a,
46485        );
46486        let e = _mm512_setr_ps(
46487            10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
46488        );
46489        assert_eq_m512(r, e);
46490    }
46491
46492    #[simd_test(enable = "avx512f")]
46493    unsafe fn test_mm512_maskz_getmant_ps() {
46494        let a = _mm512_set1_ps(10.);
46495        let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46496        assert_eq_m512(r, _mm512_setzero_ps());
46497        let r =
46498            _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111_00000000, a);
46499        let e = _mm512_setr_ps(
46500            0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
46501        );
46502        assert_eq_m512(r, e);
46503    }
46504
46505    #[simd_test(enable = "avx512f,avx512vl")]
46506    unsafe fn test_mm256_getmant_ps() {
46507        let a = _mm256_set1_ps(10.);
46508        let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46509        let e = _mm256_set1_ps(1.25);
46510        assert_eq_m256(r, e);
46511    }
46512
46513    #[simd_test(enable = "avx512f,avx512vl")]
46514    unsafe fn test_mm256_mask_getmant_ps() {
46515        let a = _mm256_set1_ps(10.);
46516        let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46517        assert_eq_m256(r, a);
46518        let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a);
46519        let e = _mm256_set1_ps(1.25);
46520        assert_eq_m256(r, e);
46521    }
46522
46523    #[simd_test(enable = "avx512f,avx512vl")]
46524    unsafe fn test_mm256_maskz_getmant_ps() {
46525        let a = _mm256_set1_ps(10.);
46526        let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46527        assert_eq_m256(r, _mm256_setzero_ps());
46528        let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a);
46529        let e = _mm256_set1_ps(1.25);
46530        assert_eq_m256(r, e);
46531    }
46532
46533    #[simd_test(enable = "avx512f,avx512vl")]
46534    unsafe fn test_mm_getmant_ps() {
46535        let a = _mm_set1_ps(10.);
46536        let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46537        let e = _mm_set1_ps(1.25);
46538        assert_eq_m128(r, e);
46539    }
46540
46541    #[simd_test(enable = "avx512f,avx512vl")]
46542    unsafe fn test_mm_mask_getmant_ps() {
46543        let a = _mm_set1_ps(10.);
46544        let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46545        assert_eq_m128(r, a);
46546        let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00001111, a);
46547        let e = _mm_set1_ps(1.25);
46548        assert_eq_m128(r, e);
46549    }
46550
46551    #[simd_test(enable = "avx512f,avx512vl")]
46552    unsafe fn test_mm_maskz_getmant_ps() {
46553        let a = _mm_set1_ps(10.);
46554        let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46555        assert_eq_m128(r, _mm_setzero_ps());
46556        let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00001111, a);
46557        let e = _mm_set1_ps(1.25);
46558        assert_eq_m128(r, e);
46559    }
46560
46561    #[simd_test(enable = "avx512f")]
46562    unsafe fn test_mm512_add_round_ps() {
46563        let a = _mm512_setr_ps(
46564            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46565        );
46566        let b = _mm512_set1_ps(-1.);
46567        let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46568        #[rustfmt::skip]
46569        let e = _mm512_setr_ps(
46570            -1., 0.5, 1., 2.5,
46571            3., 4.5, 5., 6.5,
46572            7., 8.5, 9., 10.5,
46573            11., 12.5, 13., -0.99999994,
46574        );
46575        assert_eq_m512(r, e);
46576        let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46577        let e = _mm512_setr_ps(
46578            -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
46579        );
46580        assert_eq_m512(r, e);
46581    }
46582
46583    #[simd_test(enable = "avx512f")]
46584    unsafe fn test_mm512_mask_add_round_ps() {
46585        let a = _mm512_setr_ps(
46586            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46587        );
46588        let b = _mm512_set1_ps(-1.);
46589        let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
46590        assert_eq_m512(r, a);
46591        let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46592            a,
46593            0b11111111_00000000,
46594            a,
46595            b,
46596        );
46597        #[rustfmt::skip]
46598        let e = _mm512_setr_ps(
46599            0., 1.5, 2., 3.5,
46600            4., 5.5, 6., 7.5,
46601            7., 8.5, 9., 10.5,
46602            11., 12.5, 13., -0.99999994,
46603        );
46604        assert_eq_m512(r, e);
46605    }
46606
46607    #[simd_test(enable = "avx512f")]
46608    unsafe fn test_mm512_maskz_add_round_ps() {
46609        let a = _mm512_setr_ps(
46610            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46611        );
46612        let b = _mm512_set1_ps(-1.);
46613        let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
46614        assert_eq_m512(r, _mm512_setzero_ps());
46615        let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46616            0b11111111_00000000,
46617            a,
46618            b,
46619        );
46620        #[rustfmt::skip]
46621        let e = _mm512_setr_ps(
46622            0., 0., 0., 0.,
46623            0., 0., 0., 0.,
46624            7., 8.5, 9., 10.5,
46625            11., 12.5, 13., -0.99999994,
46626        );
46627        assert_eq_m512(r, e);
46628    }
46629
46630    #[simd_test(enable = "avx512f")]
46631    unsafe fn test_mm512_sub_round_ps() {
46632        let a = _mm512_setr_ps(
46633            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46634        );
46635        let b = _mm512_set1_ps(1.);
46636        let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46637        #[rustfmt::skip]
46638        let e = _mm512_setr_ps(
46639            -1., 0.5, 1., 2.5,
46640            3., 4.5, 5., 6.5,
46641            7., 8.5, 9., 10.5,
46642            11., 12.5, 13., -0.99999994,
46643        );
46644        assert_eq_m512(r, e);
46645        let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46646        let e = _mm512_setr_ps(
46647            -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
46648        );
46649        assert_eq_m512(r, e);
46650    }
46651
46652    #[simd_test(enable = "avx512f")]
46653    unsafe fn test_mm512_mask_sub_round_ps() {
46654        let a = _mm512_setr_ps(
46655            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46656        );
46657        let b = _mm512_set1_ps(1.);
46658        let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46659            a, 0, a, b,
46660        );
46661        assert_eq_m512(r, a);
46662        let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46663            a,
46664            0b11111111_00000000,
46665            a,
46666            b,
46667        );
46668        #[rustfmt::skip]
46669        let e = _mm512_setr_ps(
46670            0., 1.5, 2., 3.5,
46671            4., 5.5, 6., 7.5,
46672            7., 8.5, 9., 10.5,
46673            11., 12.5, 13., -0.99999994,
46674        );
46675        assert_eq_m512(r, e);
46676    }
46677
46678    #[simd_test(enable = "avx512f")]
46679    unsafe fn test_mm512_maskz_sub_round_ps() {
46680        let a = _mm512_setr_ps(
46681            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46682        );
46683        let b = _mm512_set1_ps(1.);
46684        let r =
46685            _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46686        assert_eq_m512(r, _mm512_setzero_ps());
46687        let r = _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46688            0b11111111_00000000,
46689            a,
46690            b,
46691        );
46692        #[rustfmt::skip]
46693        let e = _mm512_setr_ps(
46694            0., 0., 0., 0.,
46695            0., 0., 0., 0.,
46696            7., 8.5, 9., 10.5,
46697            11., 12.5, 13., -0.99999994,
46698        );
46699        assert_eq_m512(r, e);
46700    }
46701
46702    #[simd_test(enable = "avx512f")]
46703    unsafe fn test_mm512_mul_round_ps() {
46704        #[rustfmt::skip]
46705        let a = _mm512_setr_ps(
46706            0., 1.5, 2., 3.5,
46707            4., 5.5, 6., 7.5,
46708            8., 9.5, 10., 11.5,
46709            12., 13.5, 14., 0.00000000000000000000007,
46710        );
46711        let b = _mm512_set1_ps(0.1);
46712        let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46713        #[rustfmt::skip]
46714        let e = _mm512_setr_ps(
46715            0., 0.15, 0.2, 0.35,
46716            0.4, 0.55, 0.6, 0.75,
46717            0.8, 0.95, 1.0, 1.15,
46718            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46719        );
46720        assert_eq_m512(r, e);
46721        let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46722        #[rustfmt::skip]
46723        let e = _mm512_setr_ps(
46724            0., 0.14999999, 0.2, 0.35,
46725            0.4, 0.54999995, 0.59999996, 0.75,
46726            0.8, 0.95, 1.0, 1.15,
46727            1.1999999, 1.3499999, 1.4, 0.000000000000000000000007,
46728        );
46729        assert_eq_m512(r, e);
46730    }
46731
46732    #[simd_test(enable = "avx512f")]
46733    unsafe fn test_mm512_mask_mul_round_ps() {
46734        #[rustfmt::skip]
46735        let a = _mm512_setr_ps(
46736            0., 1.5, 2., 3.5,
46737            4., 5.5, 6., 7.5,
46738            8., 9.5, 10., 11.5,
46739            12., 13.5, 14., 0.00000000000000000000007,
46740        );
46741        let b = _mm512_set1_ps(0.1);
46742        let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46743            a, 0, a, b,
46744        );
46745        assert_eq_m512(r, a);
46746        let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46747            a,
46748            0b11111111_00000000,
46749            a,
46750            b,
46751        );
46752        #[rustfmt::skip]
46753        let e = _mm512_setr_ps(
46754            0., 1.5, 2., 3.5,
46755            4., 5.5, 6., 7.5,
46756            0.8, 0.95, 1.0, 1.15,
46757            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46758        );
46759        assert_eq_m512(r, e);
46760    }
46761
46762    #[simd_test(enable = "avx512f")]
46763    unsafe fn test_mm512_maskz_mul_round_ps() {
46764        #[rustfmt::skip]
46765        let a = _mm512_setr_ps(
46766            0., 1.5, 2., 3.5,
46767            4., 5.5, 6., 7.5,
46768            8., 9.5, 10., 11.5,
46769            12., 13.5, 14., 0.00000000000000000000007,
46770        );
46771        let b = _mm512_set1_ps(0.1);
46772        let r =
46773            _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46774        assert_eq_m512(r, _mm512_setzero_ps());
46775        let r = _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46776            0b11111111_00000000,
46777            a,
46778            b,
46779        );
46780        #[rustfmt::skip]
46781        let e = _mm512_setr_ps(
46782            0., 0., 0., 0.,
46783            0., 0., 0., 0.,
46784            0.8, 0.95, 1.0, 1.15,
46785            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46786        );
46787        assert_eq_m512(r, e);
46788    }
46789
46790    #[simd_test(enable = "avx512f")]
46791    unsafe fn test_mm512_div_round_ps() {
46792        let a = _mm512_set1_ps(1.);
46793        let b = _mm512_set1_ps(3.);
46794        let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46795        let e = _mm512_set1_ps(0.33333334);
46796        assert_eq_m512(r, e);
46797        let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46798        let e = _mm512_set1_ps(0.3333333);
46799        assert_eq_m512(r, e);
46800    }
46801
46802    #[simd_test(enable = "avx512f")]
46803    unsafe fn test_mm512_mask_div_round_ps() {
46804        let a = _mm512_set1_ps(1.);
46805        let b = _mm512_set1_ps(3.);
46806        let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46807            a, 0, a, b,
46808        );
46809        assert_eq_m512(r, a);
46810        let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46811            a,
46812            0b11111111_00000000,
46813            a,
46814            b,
46815        );
46816        let e = _mm512_setr_ps(
46817            1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46818            0.33333334, 0.33333334, 0.33333334, 0.33333334,
46819        );
46820        assert_eq_m512(r, e);
46821    }
46822
46823    #[simd_test(enable = "avx512f")]
46824    unsafe fn test_mm512_maskz_div_round_ps() {
46825        let a = _mm512_set1_ps(1.);
46826        let b = _mm512_set1_ps(3.);
46827        let r =
46828            _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46829        assert_eq_m512(r, _mm512_setzero_ps());
46830        let r = _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46831            0b11111111_00000000,
46832            a,
46833            b,
46834        );
46835        let e = _mm512_setr_ps(
46836            0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46837            0.33333334, 0.33333334, 0.33333334, 0.33333334,
46838        );
46839        assert_eq_m512(r, e);
46840    }
46841
46842    #[simd_test(enable = "avx512f")]
46843    unsafe fn test_mm512_sqrt_round_ps() {
46844        let a = _mm512_set1_ps(3.);
46845        let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
46846        let e = _mm512_set1_ps(1.7320508);
46847        assert_eq_m512(r, e);
46848        let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC }>(a);
46849        let e = _mm512_set1_ps(1.7320509);
46850        assert_eq_m512(r, e);
46851    }
46852
46853    #[simd_test(enable = "avx512f")]
46854    unsafe fn test_mm512_mask_sqrt_round_ps() {
46855        let a = _mm512_set1_ps(3.);
46856        let r =
46857            _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a);
46858        assert_eq_m512(r, a);
46859        let r = _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46860            a,
46861            0b11111111_00000000,
46862            a,
46863        );
46864        let e = _mm512_setr_ps(
46865            3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
46866            1.7320508, 1.7320508, 1.7320508,
46867        );
46868        assert_eq_m512(r, e);
46869    }
46870
46871    #[simd_test(enable = "avx512f")]
46872    unsafe fn test_mm512_maskz_sqrt_round_ps() {
46873        let a = _mm512_set1_ps(3.);
46874        let r =
46875            _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a);
46876        assert_eq_m512(r, _mm512_setzero_ps());
46877        let r = _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46878            0b11111111_00000000,
46879            a,
46880        );
46881        let e = _mm512_setr_ps(
46882            0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
46883            1.7320508, 1.7320508, 1.7320508,
46884        );
46885        assert_eq_m512(r, e);
46886    }
46887
46888    #[simd_test(enable = "avx512f")]
46889    unsafe fn test_mm512_fmadd_round_ps() {
46890        let a = _mm512_set1_ps(0.00000007);
46891        let b = _mm512_set1_ps(1.);
46892        let c = _mm512_set1_ps(-1.);
46893        let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46894        let e = _mm512_set1_ps(-0.99999994);
46895        assert_eq_m512(r, e);
46896        let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
46897        let e = _mm512_set1_ps(-0.9999999);
46898        assert_eq_m512(r, e);
46899    }
46900
46901    #[simd_test(enable = "avx512f")]
46902    unsafe fn test_mm512_mask_fmadd_round_ps() {
46903        let a = _mm512_set1_ps(0.00000007);
46904        let b = _mm512_set1_ps(1.);
46905        let c = _mm512_set1_ps(-1.);
46906        let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46907            a, 0, b, c,
46908        );
46909        assert_eq_m512(r, a);
46910        let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46911            a,
46912            0b00000000_11111111,
46913            b,
46914            c,
46915        );
46916        #[rustfmt::skip]
46917        let e = _mm512_setr_ps(
46918            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46919            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46920            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46921            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46922        );
46923        assert_eq_m512(r, e);
46924    }
46925
46926    #[simd_test(enable = "avx512f")]
46927    unsafe fn test_mm512_maskz_fmadd_round_ps() {
46928        let a = _mm512_set1_ps(0.00000007);
46929        let b = _mm512_set1_ps(1.);
46930        let c = _mm512_set1_ps(-1.);
46931        let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46932            0, a, b, c,
46933        );
46934        assert_eq_m512(r, _mm512_setzero_ps());
46935        #[rustfmt::skip]
46936        let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46937            0b00000000_11111111,
46938            a,
46939            b,
46940            c,
46941        );
46942        #[rustfmt::skip]
46943        let e = _mm512_setr_ps(
46944            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46945            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46946            0., 0., 0., 0.,
46947            0., 0., 0., 0.,
46948        );
46949        assert_eq_m512(r, e);
46950    }
46951
46952    #[simd_test(enable = "avx512f")]
46953    unsafe fn test_mm512_mask3_fmadd_round_ps() {
46954        let a = _mm512_set1_ps(0.00000007);
46955        let b = _mm512_set1_ps(1.);
46956        let c = _mm512_set1_ps(-1.);
46957        let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46958            a, b, c, 0,
46959        );
46960        assert_eq_m512(r, c);
46961        let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46962            a,
46963            b,
46964            c,
46965            0b00000000_11111111,
46966        );
46967        #[rustfmt::skip]
46968        let e = _mm512_setr_ps(
46969            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46970            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46971            -1., -1., -1., -1.,
46972            -1., -1., -1., -1.,
46973        );
46974        assert_eq_m512(r, e);
46975    }
46976
46977    #[simd_test(enable = "avx512f")]
46978    unsafe fn test_mm512_fmsub_round_ps() {
46979        let a = _mm512_set1_ps(0.00000007);
46980        let b = _mm512_set1_ps(1.);
46981        let c = _mm512_set1_ps(1.);
46982        let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46983        let e = _mm512_set1_ps(-0.99999994);
46984        assert_eq_m512(r, e);
46985        let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
46986        let e = _mm512_set1_ps(-0.9999999);
46987        assert_eq_m512(r, e);
46988    }
46989
46990    #[simd_test(enable = "avx512f")]
46991    unsafe fn test_mm512_mask_fmsub_round_ps() {
46992        let a = _mm512_set1_ps(0.00000007);
46993        let b = _mm512_set1_ps(1.);
46994        let c = _mm512_set1_ps(1.);
46995        let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46996            a, 0, b, c,
46997        );
46998        assert_eq_m512(r, a);
46999        let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47000            a,
47001            0b00000000_11111111,
47002            b,
47003            c,
47004        );
47005        #[rustfmt::skip]
47006        let e = _mm512_setr_ps(
47007            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
47008            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
47009            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47010            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47011        );
47012        assert_eq_m512(r, e);
47013    }
47014
47015    #[simd_test(enable = "avx512f")]
47016    unsafe fn test_mm512_maskz_fmsub_round_ps() {
47017        let a = _mm512_set1_ps(0.00000007);
47018        let b = _mm512_set1_ps(1.);
47019        let c = _mm512_set1_ps(1.);
47020        let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47021            0, a, b, c,
47022        );
47023        assert_eq_m512(r, _mm512_setzero_ps());
47024        let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47025            0b00000000_11111111,
47026            a,
47027            b,
47028            c,
47029        );
47030        #[rustfmt::skip]
47031        let e = _mm512_setr_ps(
47032            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
47033            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
47034            0., 0., 0., 0.,
47035            0., 0., 0., 0.,
47036        );
47037        assert_eq_m512(r, e);
47038    }
47039
47040    #[simd_test(enable = "avx512f")]
47041    unsafe fn test_mm512_mask3_fmsub_round_ps() {
47042        let a = _mm512_set1_ps(0.00000007);
47043        let b = _mm512_set1_ps(1.);
47044        let c = _mm512_set1_ps(1.);
47045        let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47046            a, b, c, 0,
47047        );
47048        assert_eq_m512(r, c);
47049        let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47050            a,
47051            b,
47052            c,
47053            0b00000000_11111111,
47054        );
47055        #[rustfmt::skip]
47056        let e = _mm512_setr_ps(
47057            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
47058            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
47059            1., 1., 1., 1.,
47060            1., 1., 1., 1.,
47061        );
47062        assert_eq_m512(r, e);
47063    }
47064
47065    #[simd_test(enable = "avx512f")]
47066    unsafe fn test_mm512_fmaddsub_round_ps() {
47067        let a = _mm512_set1_ps(0.00000007);
47068        let b = _mm512_set1_ps(1.);
47069        let c = _mm512_set1_ps(-1.);
47070        let r =
47071            _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47072        #[rustfmt::skip]
47073        let e = _mm512_setr_ps(
47074            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47075            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47076            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47077            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47078        );
47079        assert_eq_m512(r, e);
47080        let r = _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47081        let e = _mm512_setr_ps(
47082            1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47083            -0.9999999, 1., -0.9999999, 1., -0.9999999,
47084        );
47085        assert_eq_m512(r, e);
47086    }
47087
47088    #[simd_test(enable = "avx512f")]
47089    unsafe fn test_mm512_mask_fmaddsub_round_ps() {
47090        let a = _mm512_set1_ps(0.00000007);
47091        let b = _mm512_set1_ps(1.);
47092        let c = _mm512_set1_ps(-1.);
47093        let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47094            a, 0, b, c,
47095        );
47096        assert_eq_m512(r, a);
47097        let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47098            a,
47099            0b00000000_11111111,
47100            b,
47101            c,
47102        );
47103        #[rustfmt::skip]
47104        let e = _mm512_setr_ps(
47105            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47106            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47107            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47108            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47109        );
47110        assert_eq_m512(r, e);
47111    }
47112
47113    #[simd_test(enable = "avx512f")]
47114    unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
47115        let a = _mm512_set1_ps(0.00000007);
47116        let b = _mm512_set1_ps(1.);
47117        let c = _mm512_set1_ps(-1.);
47118        let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47119            0, a, b, c,
47120        );
47121        assert_eq_m512(r, _mm512_setzero_ps());
47122        let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47123            0b00000000_11111111,
47124            a,
47125            b,
47126            c,
47127        );
47128        #[rustfmt::skip]
47129        let e = _mm512_setr_ps(
47130            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47131            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47132            0., 0., 0., 0.,
47133            0., 0., 0., 0.,
47134        );
47135        assert_eq_m512(r, e);
47136    }
47137
47138    #[simd_test(enable = "avx512f")]
47139    unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
47140        let a = _mm512_set1_ps(0.00000007);
47141        let b = _mm512_set1_ps(1.);
47142        let c = _mm512_set1_ps(-1.);
47143        let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47144            a, b, c, 0,
47145        );
47146        assert_eq_m512(r, c);
47147        let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47148            a,
47149            b,
47150            c,
47151            0b00000000_11111111,
47152        );
47153        #[rustfmt::skip]
47154        let e = _mm512_setr_ps(
47155            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47156            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47157            -1., -1., -1., -1.,
47158            -1., -1., -1., -1.,
47159        );
47160        assert_eq_m512(r, e);
47161    }
47162
47163    #[simd_test(enable = "avx512f")]
47164    unsafe fn test_mm512_fmsubadd_round_ps() {
47165        let a = _mm512_set1_ps(0.00000007);
47166        let b = _mm512_set1_ps(1.);
47167        let c = _mm512_set1_ps(-1.);
47168        let r =
47169            _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47170        #[rustfmt::skip]
47171        let e = _mm512_setr_ps(
47172            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47173            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47174            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47175            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47176        );
47177        assert_eq_m512(r, e);
47178        let r = _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47179        let e = _mm512_setr_ps(
47180            -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47181            -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47182        );
47183        assert_eq_m512(r, e);
47184    }
47185
47186    #[simd_test(enable = "avx512f")]
47187    unsafe fn test_mm512_mask_fmsubadd_round_ps() {
47188        let a = _mm512_set1_ps(0.00000007);
47189        let b = _mm512_set1_ps(1.);
47190        let c = _mm512_set1_ps(-1.);
47191        let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47192            a, 0, b, c,
47193        );
47194        assert_eq_m512(r, a);
47195        let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47196            a,
47197            0b00000000_11111111,
47198            b,
47199            c,
47200        );
47201        #[rustfmt::skip]
47202        let e = _mm512_setr_ps(
47203            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47204            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47205            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47206            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47207        );
47208        assert_eq_m512(r, e);
47209    }
47210
47211    #[simd_test(enable = "avx512f")]
47212    unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
47213        let a = _mm512_set1_ps(0.00000007);
47214        let b = _mm512_set1_ps(1.);
47215        let c = _mm512_set1_ps(-1.);
47216        let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47217            0, a, b, c,
47218        );
47219        assert_eq_m512(r, _mm512_setzero_ps());
47220        let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47221            0b00000000_11111111,
47222            a,
47223            b,
47224            c,
47225        );
47226        #[rustfmt::skip]
47227        let e = _mm512_setr_ps(
47228            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47229            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47230            0., 0., 0., 0.,
47231            0., 0., 0., 0.,
47232        );
47233        assert_eq_m512(r, e);
47234    }
47235
47236    #[simd_test(enable = "avx512f")]
47237    unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
47238        let a = _mm512_set1_ps(0.00000007);
47239        let b = _mm512_set1_ps(1.);
47240        let c = _mm512_set1_ps(-1.);
47241        let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47242            a, b, c, 0,
47243        );
47244        assert_eq_m512(r, c);
47245        let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47246            a,
47247            b,
47248            c,
47249            0b00000000_11111111,
47250        );
47251        #[rustfmt::skip]
47252        let e = _mm512_setr_ps(
47253            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47254            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47255            -1., -1., -1., -1.,
47256            -1., -1., -1., -1.,
47257        );
47258        assert_eq_m512(r, e);
47259    }
47260
47261    #[simd_test(enable = "avx512f")]
47262    unsafe fn test_mm512_fnmadd_round_ps() {
47263        let a = _mm512_set1_ps(0.00000007);
47264        let b = _mm512_set1_ps(1.);
47265        let c = _mm512_set1_ps(1.);
47266        let r =
47267            _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47268        let e = _mm512_set1_ps(0.99999994);
47269        assert_eq_m512(r, e);
47270        let r = _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47271        let e = _mm512_set1_ps(0.9999999);
47272        assert_eq_m512(r, e);
47273    }
47274
47275    #[simd_test(enable = "avx512f")]
47276    unsafe fn test_mm512_mask_fnmadd_round_ps() {
47277        let a = _mm512_set1_ps(0.00000007);
47278        let b = _mm512_set1_ps(1.);
47279        let c = _mm512_set1_ps(1.);
47280        let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47281            a, 0, b, c,
47282        );
47283        assert_eq_m512(r, a);
47284        let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47285            a,
47286            0b00000000_11111111,
47287            b,
47288            c,
47289        );
47290        let e = _mm512_setr_ps(
47291            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47292            0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47293            0.00000007, 0.00000007,
47294        );
47295        assert_eq_m512(r, e);
47296    }
47297
47298    #[simd_test(enable = "avx512f")]
47299    unsafe fn test_mm512_maskz_fnmadd_round_ps() {
47300        let a = _mm512_set1_ps(0.00000007);
47301        let b = _mm512_set1_ps(1.);
47302        let c = _mm512_set1_ps(1.);
47303        let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47304            0, a, b, c,
47305        );
47306        assert_eq_m512(r, _mm512_setzero_ps());
47307        let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47308            0b00000000_11111111,
47309            a,
47310            b,
47311            c,
47312        );
47313        let e = _mm512_setr_ps(
47314            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47315            0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
47316        );
47317        assert_eq_m512(r, e);
47318    }
47319
47320    #[simd_test(enable = "avx512f")]
47321    unsafe fn test_mm512_mask3_fnmadd_round_ps() {
47322        let a = _mm512_set1_ps(0.00000007);
47323        let b = _mm512_set1_ps(1.);
47324        let c = _mm512_set1_ps(1.);
47325        let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47326            a, b, c, 0,
47327        );
47328        assert_eq_m512(r, c);
47329        let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47330            a,
47331            b,
47332            c,
47333            0b00000000_11111111,
47334        );
47335        let e = _mm512_setr_ps(
47336            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47337            0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
47338        );
47339        assert_eq_m512(r, e);
47340    }
47341
47342    #[simd_test(enable = "avx512f")]
47343    unsafe fn test_mm512_fnmsub_round_ps() {
47344        let a = _mm512_set1_ps(0.00000007);
47345        let b = _mm512_set1_ps(1.);
47346        let c = _mm512_set1_ps(-1.);
47347        let r =
47348            _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47349        let e = _mm512_set1_ps(0.99999994);
47350        assert_eq_m512(r, e);
47351        let r = _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47352        let e = _mm512_set1_ps(0.9999999);
47353        assert_eq_m512(r, e);
47354    }
47355
47356    #[simd_test(enable = "avx512f")]
47357    unsafe fn test_mm512_mask_fnmsub_round_ps() {
47358        let a = _mm512_set1_ps(0.00000007);
47359        let b = _mm512_set1_ps(1.);
47360        let c = _mm512_set1_ps(-1.);
47361        let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47362            a, 0, b, c,
47363        );
47364        assert_eq_m512(r, a);
47365        let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47366            a,
47367            0b00000000_11111111,
47368            b,
47369            c,
47370        );
47371        let e = _mm512_setr_ps(
47372            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47373            0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47374            0.00000007, 0.00000007,
47375        );
47376        assert_eq_m512(r, e);
47377    }
47378
47379    #[simd_test(enable = "avx512f")]
47380    unsafe fn test_mm512_maskz_fnmsub_round_ps() {
47381        let a = _mm512_set1_ps(0.00000007);
47382        let b = _mm512_set1_ps(1.);
47383        let c = _mm512_set1_ps(-1.);
47384        let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47385            0, a, b, c,
47386        );
47387        assert_eq_m512(r, _mm512_setzero_ps());
47388        let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47389            0b00000000_11111111,
47390            a,
47391            b,
47392            c,
47393        );
47394        let e = _mm512_setr_ps(
47395            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47396            0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
47397        );
47398        assert_eq_m512(r, e);
47399    }
47400
47401    #[simd_test(enable = "avx512f")]
47402    unsafe fn test_mm512_mask3_fnmsub_round_ps() {
47403        let a = _mm512_set1_ps(0.00000007);
47404        let b = _mm512_set1_ps(1.);
47405        let c = _mm512_set1_ps(-1.);
47406        let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47407            a, b, c, 0,
47408        );
47409        assert_eq_m512(r, c);
47410        let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47411            a,
47412            b,
47413            c,
47414            0b00000000_11111111,
47415        );
47416        let e = _mm512_setr_ps(
47417            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47418            0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
47419        );
47420        assert_eq_m512(r, e);
47421    }
47422
47423    #[simd_test(enable = "avx512f")]
47424    unsafe fn test_mm512_max_round_ps() {
47425        let a = _mm512_setr_ps(
47426            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47427        );
47428        let b = _mm512_setr_ps(
47429            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47430        );
47431        let r = _mm512_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
47432        let e = _mm512_setr_ps(
47433            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
47434        );
47435        assert_eq_m512(r, e);
47436    }
47437
47438    #[simd_test(enable = "avx512f")]
47439    unsafe fn test_mm512_mask_max_round_ps() {
47440        let a = _mm512_setr_ps(
47441            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47442        );
47443        let b = _mm512_setr_ps(
47444            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47445        );
47446        let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
47447        assert_eq_m512(r, a);
47448        let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
47449        let e = _mm512_setr_ps(
47450            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
47451        );
47452        assert_eq_m512(r, e);
47453    }
47454
47455    #[simd_test(enable = "avx512f")]
47456    unsafe fn test_mm512_maskz_max_round_ps() {
47457        let a = _mm512_setr_ps(
47458            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47459        );
47460        let b = _mm512_setr_ps(
47461            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47462        );
47463        let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
47464        assert_eq_m512(r, _mm512_setzero_ps());
47465        let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
47466        let e = _mm512_setr_ps(
47467            15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47468        );
47469        assert_eq_m512(r, e);
47470    }
47471
47472    #[simd_test(enable = "avx512f")]
47473    unsafe fn test_mm512_min_round_ps() {
47474        let a = _mm512_setr_ps(
47475            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47476        );
47477        let b = _mm512_setr_ps(
47478            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47479        );
47480        let r = _mm512_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
47481        let e = _mm512_setr_ps(
47482            0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
47483        );
47484        assert_eq_m512(r, e);
47485    }
47486
47487    #[simd_test(enable = "avx512f")]
47488    unsafe fn test_mm512_mask_min_round_ps() {
47489        let a = _mm512_setr_ps(
47490            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47491        );
47492        let b = _mm512_setr_ps(
47493            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47494        );
47495        let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
47496        assert_eq_m512(r, a);
47497        let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
47498        let e = _mm512_setr_ps(
47499            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47500        );
47501        assert_eq_m512(r, e);
47502    }
47503
47504    #[simd_test(enable = "avx512f")]
47505    unsafe fn test_mm512_maskz_min_round_ps() {
47506        let a = _mm512_setr_ps(
47507            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47508        );
47509        let b = _mm512_setr_ps(
47510            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47511        );
47512        let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
47513        assert_eq_m512(r, _mm512_setzero_ps());
47514        let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
47515        let e = _mm512_setr_ps(
47516            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
47517        );
47518        assert_eq_m512(r, e);
47519    }
47520
47521    #[simd_test(enable = "avx512f")]
47522    unsafe fn test_mm512_getexp_round_ps() {
47523        let a = _mm512_set1_ps(3.);
47524        let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
47525        let e = _mm512_set1_ps(1.);
47526        assert_eq_m512(r, e);
47527    }
47528
47529    #[simd_test(enable = "avx512f")]
47530    unsafe fn test_mm512_mask_getexp_round_ps() {
47531        let a = _mm512_set1_ps(3.);
47532        let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a);
47533        assert_eq_m512(r, a);
47534        let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111_00000000, a);
47535        let e = _mm512_setr_ps(
47536            3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
47537        );
47538        assert_eq_m512(r, e);
47539    }
47540
47541    #[simd_test(enable = "avx512f")]
47542    unsafe fn test_mm512_maskz_getexp_round_ps() {
47543        let a = _mm512_set1_ps(3.);
47544        let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a);
47545        assert_eq_m512(r, _mm512_setzero_ps());
47546        let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b11111111_00000000, a);
47547        let e = _mm512_setr_ps(
47548            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47549        );
47550        assert_eq_m512(r, e);
47551    }
47552
47553    #[simd_test(enable = "avx512f")]
47554    unsafe fn test_mm512_roundscale_round_ps() {
47555        let a = _mm512_set1_ps(1.1);
47556        let r = _mm512_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a);
47557        let e = _mm512_set1_ps(1.0);
47558        assert_eq_m512(r, e);
47559    }
47560
47561    #[simd_test(enable = "avx512f")]
47562    unsafe fn test_mm512_mask_roundscale_round_ps() {
47563        let a = _mm512_set1_ps(1.1);
47564        let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a);
47565        let e = _mm512_set1_ps(1.1);
47566        assert_eq_m512(r, e);
47567        let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(
47568            a,
47569            0b11111111_11111111,
47570            a,
47571        );
47572        let e = _mm512_set1_ps(1.0);
47573        assert_eq_m512(r, e);
47574    }
47575
47576    #[simd_test(enable = "avx512f")]
47577    unsafe fn test_mm512_maskz_roundscale_round_ps() {
47578        let a = _mm512_set1_ps(1.1);
47579        let r = _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0, a);
47580        assert_eq_m512(r, _mm512_setzero_ps());
47581        let r =
47582            _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111_11111111, a);
47583        let e = _mm512_set1_ps(1.0);
47584        assert_eq_m512(r, e);
47585    }
47586
47587    #[simd_test(enable = "avx512f")]
47588    unsafe fn test_mm512_scalef_round_ps() {
47589        let a = _mm512_set1_ps(1.);
47590        let b = _mm512_set1_ps(3.);
47591        let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
47592        let e = _mm512_set1_ps(8.);
47593        assert_eq_m512(r, e);
47594    }
47595
47596    #[simd_test(enable = "avx512f")]
47597    unsafe fn test_mm512_mask_scalef_round_ps() {
47598        let a = _mm512_set1_ps(1.);
47599        let b = _mm512_set1_ps(3.);
47600        let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47601            a, 0, a, b,
47602        );
47603        assert_eq_m512(r, a);
47604        let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47605            a,
47606            0b11111111_00000000,
47607            a,
47608            b,
47609        );
47610        let e = _mm512_set_ps(
47611            8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
47612        );
47613        assert_eq_m512(r, e);
47614    }
47615
47616    #[simd_test(enable = "avx512f")]
47617    unsafe fn test_mm512_maskz_scalef_round_ps() {
47618        let a = _mm512_set1_ps(1.);
47619        let b = _mm512_set1_ps(3.);
47620        let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47621            0, a, b,
47622        );
47623        assert_eq_m512(r, _mm512_setzero_ps());
47624        let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47625            0b11111111_00000000,
47626            a,
47627            b,
47628        );
47629        let e = _mm512_set_ps(
47630            8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47631        );
47632        assert_eq_m512(r, e);
47633    }
47634
47635    #[simd_test(enable = "avx512f")]
47636    unsafe fn test_mm512_fixupimm_round_ps() {
47637        let a = _mm512_set1_ps(f32::NAN);
47638        let b = _mm512_set1_ps(f32::MAX);
47639        let c = _mm512_set1_epi32(i32::MAX);
47640        let r = _mm512_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
47641        let e = _mm512_set1_ps(0.0);
47642        assert_eq_m512(r, e);
47643    }
47644
47645    #[simd_test(enable = "avx512f")]
47646    unsafe fn test_mm512_mask_fixupimm_round_ps() {
47647        #[rustfmt::skip]
47648        let a = _mm512_set_ps(
47649            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47650            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47651            1., 1., 1., 1.,
47652            1., 1., 1., 1.,
47653        );
47654        let b = _mm512_set1_ps(f32::MAX);
47655        let c = _mm512_set1_epi32(i32::MAX);
47656        let r = _mm512_mask_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
47657            a,
47658            0b11111111_00000000,
47659            b,
47660            c,
47661        );
47662        let e = _mm512_set_ps(
47663            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47664        );
47665        assert_eq_m512(r, e);
47666    }
47667
47668    #[simd_test(enable = "avx512f")]
47669    unsafe fn test_mm512_maskz_fixupimm_round_ps() {
47670        #[rustfmt::skip]
47671        let a = _mm512_set_ps(
47672            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47673            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47674            1., 1., 1., 1.,
47675            1., 1., 1., 1.,
47676        );
47677        let b = _mm512_set1_ps(f32::MAX);
47678        let c = _mm512_set1_epi32(i32::MAX);
47679        let r = _mm512_maskz_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
47680            0b11111111_00000000,
47681            a,
47682            b,
47683            c,
47684        );
47685        let e = _mm512_set_ps(
47686            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
47687        );
47688        assert_eq_m512(r, e);
47689    }
47690
47691    #[simd_test(enable = "avx512f")]
47692    unsafe fn test_mm512_getmant_round_ps() {
47693        let a = _mm512_set1_ps(10.);
47694        let r = _mm512_getmant_round_ps::<
47695            _MM_MANT_NORM_1_2,
47696            _MM_MANT_SIGN_SRC,
47697            _MM_FROUND_CUR_DIRECTION,
47698        >(a);
47699        let e = _mm512_set1_ps(1.25);
47700        assert_eq_m512(r, e);
47701    }
47702
47703    #[simd_test(enable = "avx512f")]
47704    unsafe fn test_mm512_mask_getmant_round_ps() {
47705        let a = _mm512_set1_ps(10.);
47706        let r = _mm512_mask_getmant_round_ps::<
47707            _MM_MANT_NORM_1_2,
47708            _MM_MANT_SIGN_SRC,
47709            _MM_FROUND_CUR_DIRECTION,
47710        >(a, 0, a);
47711        assert_eq_m512(r, a);
47712        let r = _mm512_mask_getmant_round_ps::<
47713            _MM_MANT_NORM_1_2,
47714            _MM_MANT_SIGN_SRC,
47715            _MM_FROUND_CUR_DIRECTION,
47716        >(a, 0b11111111_00000000, a);
47717        let e = _mm512_setr_ps(
47718            10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
47719        );
47720        assert_eq_m512(r, e);
47721    }
47722
47723    #[simd_test(enable = "avx512f")]
47724    unsafe fn test_mm512_maskz_getmant_round_ps() {
47725        let a = _mm512_set1_ps(10.);
47726        let r = _mm512_maskz_getmant_round_ps::<
47727            _MM_MANT_NORM_1_2,
47728            _MM_MANT_SIGN_SRC,
47729            _MM_FROUND_CUR_DIRECTION,
47730        >(0, a);
47731        assert_eq_m512(r, _mm512_setzero_ps());
47732        let r = _mm512_maskz_getmant_round_ps::<
47733            _MM_MANT_NORM_1_2,
47734            _MM_MANT_SIGN_SRC,
47735            _MM_FROUND_CUR_DIRECTION,
47736        >(0b11111111_00000000, a);
47737        let e = _mm512_setr_ps(
47738            0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
47739        );
47740        assert_eq_m512(r, e);
47741    }
47742
47743    #[simd_test(enable = "avx512f")]
47744    unsafe fn test_mm512_cvtps_epi32() {
47745        let a = _mm512_setr_ps(
47746            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47747        );
47748        let r = _mm512_cvtps_epi32(a);
47749        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47750        assert_eq_m512i(r, e);
47751    }
47752
47753    #[simd_test(enable = "avx512f")]
47754    unsafe fn test_mm512_mask_cvtps_epi32() {
47755        let a = _mm512_setr_ps(
47756            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47757        );
47758        let src = _mm512_set1_epi32(0);
47759        let r = _mm512_mask_cvtps_epi32(src, 0, a);
47760        assert_eq_m512i(r, src);
47761        let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a);
47762        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
47763        assert_eq_m512i(r, e);
47764    }
47765
47766    #[simd_test(enable = "avx512f")]
47767    unsafe fn test_mm512_maskz_cvtps_epi32() {
47768        let a = _mm512_setr_ps(
47769            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47770        );
47771        let r = _mm512_maskz_cvtps_epi32(0, a);
47772        assert_eq_m512i(r, _mm512_setzero_si512());
47773        let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a);
47774        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
47775        assert_eq_m512i(r, e);
47776    }
47777
47778    #[simd_test(enable = "avx512f,avx512vl")]
47779    unsafe fn test_mm256_mask_cvtps_epi32() {
47780        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47781        let src = _mm256_set1_epi32(0);
47782        let r = _mm256_mask_cvtps_epi32(src, 0, a);
47783        assert_eq_m256i(r, src);
47784        let r = _mm256_mask_cvtps_epi32(src, 0b11111111, a);
47785        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47786        assert_eq_m256i(r, e);
47787    }
47788
47789    #[simd_test(enable = "avx512f,avx512vl")]
47790    unsafe fn test_mm256_maskz_cvtps_epi32() {
47791        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47792        let r = _mm256_maskz_cvtps_epi32(0, a);
47793        assert_eq_m256i(r, _mm256_setzero_si256());
47794        let r = _mm256_maskz_cvtps_epi32(0b11111111, a);
47795        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47796        assert_eq_m256i(r, e);
47797    }
47798
47799    #[simd_test(enable = "avx512f,avx512vl")]
47800    unsafe fn test_mm_mask_cvtps_epi32() {
47801        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47802        let src = _mm_set1_epi32(0);
47803        let r = _mm_mask_cvtps_epi32(src, 0, a);
47804        assert_eq_m128i(r, src);
47805        let r = _mm_mask_cvtps_epi32(src, 0b00001111, a);
47806        let e = _mm_set_epi32(12, 14, 14, 16);
47807        assert_eq_m128i(r, e);
47808    }
47809
47810    #[simd_test(enable = "avx512f,avx512vl")]
47811    unsafe fn test_mm_maskz_cvtps_epi32() {
47812        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47813        let r = _mm_maskz_cvtps_epi32(0, a);
47814        assert_eq_m128i(r, _mm_setzero_si128());
47815        let r = _mm_maskz_cvtps_epi32(0b00001111, a);
47816        let e = _mm_set_epi32(12, 14, 14, 16);
47817        assert_eq_m128i(r, e);
47818    }
47819
47820    #[simd_test(enable = "avx512f")]
47821    unsafe fn test_mm512_cvtps_epu32() {
47822        let a = _mm512_setr_ps(
47823            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47824        );
47825        let r = _mm512_cvtps_epu32(a);
47826        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
47827        assert_eq_m512i(r, e);
47828    }
47829
47830    #[simd_test(enable = "avx512f")]
47831    unsafe fn test_mm512_mask_cvtps_epu32() {
47832        let a = _mm512_setr_ps(
47833            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47834        );
47835        let src = _mm512_set1_epi32(0);
47836        let r = _mm512_mask_cvtps_epu32(src, 0, a);
47837        assert_eq_m512i(r, src);
47838        let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a);
47839        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47840        assert_eq_m512i(r, e);
47841    }
47842
47843    #[simd_test(enable = "avx512f")]
47844    unsafe fn test_mm512_maskz_cvtps_epu32() {
47845        let a = _mm512_setr_ps(
47846            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47847        );
47848        let r = _mm512_maskz_cvtps_epu32(0, a);
47849        assert_eq_m512i(r, _mm512_setzero_si512());
47850        let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a);
47851        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47852        assert_eq_m512i(r, e);
47853    }
47854
47855    #[simd_test(enable = "avx512f,avx512vl")]
47856    unsafe fn test_mm256_cvtps_epu32() {
47857        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47858        let r = _mm256_cvtps_epu32(a);
47859        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47860        assert_eq_m256i(r, e);
47861    }
47862
47863    #[simd_test(enable = "avx512f,avx512vl")]
47864    unsafe fn test_mm256_mask_cvtps_epu32() {
47865        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47866        let src = _mm256_set1_epi32(0);
47867        let r = _mm256_mask_cvtps_epu32(src, 0, a);
47868        assert_eq_m256i(r, src);
47869        let r = _mm256_mask_cvtps_epu32(src, 0b11111111, a);
47870        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47871        assert_eq_m256i(r, e);
47872    }
47873
47874    #[simd_test(enable = "avx512f,avx512vl")]
47875    unsafe fn test_mm256_maskz_cvtps_epu32() {
47876        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47877        let r = _mm256_maskz_cvtps_epu32(0, a);
47878        assert_eq_m256i(r, _mm256_setzero_si256());
47879        let r = _mm256_maskz_cvtps_epu32(0b11111111, a);
47880        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47881        assert_eq_m256i(r, e);
47882    }
47883
47884    #[simd_test(enable = "avx512f,avx512vl")]
47885    unsafe fn test_mm_cvtps_epu32() {
47886        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47887        let r = _mm_cvtps_epu32(a);
47888        let e = _mm_set_epi32(12, 14, 14, 16);
47889        assert_eq_m128i(r, e);
47890    }
47891
47892    #[simd_test(enable = "avx512f,avx512vl")]
47893    unsafe fn test_mm_mask_cvtps_epu32() {
47894        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47895        let src = _mm_set1_epi32(0);
47896        let r = _mm_mask_cvtps_epu32(src, 0, a);
47897        assert_eq_m128i(r, src);
47898        let r = _mm_mask_cvtps_epu32(src, 0b00001111, a);
47899        let e = _mm_set_epi32(12, 14, 14, 16);
47900        assert_eq_m128i(r, e);
47901    }
47902
47903    #[simd_test(enable = "avx512f,avx512vl")]
47904    unsafe fn test_mm_maskz_cvtps_epu32() {
47905        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47906        let r = _mm_maskz_cvtps_epu32(0, a);
47907        assert_eq_m128i(r, _mm_setzero_si128());
47908        let r = _mm_maskz_cvtps_epu32(0b00001111, a);
47909        let e = _mm_set_epi32(12, 14, 14, 16);
47910        assert_eq_m128i(r, e);
47911    }
47912
47913    #[simd_test(enable = "avx512f")]
47914    unsafe fn test_mm512_cvtepi8_epi32() {
47915        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47916        let r = _mm512_cvtepi8_epi32(a);
47917        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47918        assert_eq_m512i(r, e);
47919    }
47920
47921    #[simd_test(enable = "avx512f")]
47922    unsafe fn test_mm512_mask_cvtepi8_epi32() {
47923        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47924        let src = _mm512_set1_epi32(-1);
47925        let r = _mm512_mask_cvtepi8_epi32(src, 0, a);
47926        assert_eq_m512i(r, src);
47927        let r = _mm512_mask_cvtepi8_epi32(src, 0b00000000_11111111, a);
47928        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47929        assert_eq_m512i(r, e);
47930    }
47931
47932    #[simd_test(enable = "avx512f")]
47933    unsafe fn test_mm512_maskz_cvtepi8_epi32() {
47934        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47935        let r = _mm512_maskz_cvtepi8_epi32(0, a);
47936        assert_eq_m512i(r, _mm512_setzero_si512());
47937        let r = _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a);
47938        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
47939        assert_eq_m512i(r, e);
47940    }
47941
47942    #[simd_test(enable = "avx512f,avx512vl")]
47943    unsafe fn test_mm256_mask_cvtepi8_epi32() {
47944        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47945        let src = _mm256_set1_epi32(-1);
47946        let r = _mm256_mask_cvtepi8_epi32(src, 0, a);
47947        assert_eq_m256i(r, src);
47948        let r = _mm256_mask_cvtepi8_epi32(src, 0b11111111, a);
47949        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47950        assert_eq_m256i(r, e);
47951    }
47952
47953    #[simd_test(enable = "avx512f,avx512vl")]
47954    unsafe fn test_mm256_maskz_cvtepi8_epi32() {
47955        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47956        let r = _mm256_maskz_cvtepi8_epi32(0, a);
47957        assert_eq_m256i(r, _mm256_setzero_si256());
47958        let r = _mm256_maskz_cvtepi8_epi32(0b11111111, a);
47959        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47960        assert_eq_m256i(r, e);
47961    }
47962
47963    #[simd_test(enable = "avx512f,avx512vl")]
47964    unsafe fn test_mm_mask_cvtepi8_epi32() {
47965        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47966        let src = _mm_set1_epi32(-1);
47967        let r = _mm_mask_cvtepi8_epi32(src, 0, a);
47968        assert_eq_m128i(r, src);
47969        let r = _mm_mask_cvtepi8_epi32(src, 0b00001111, a);
47970        let e = _mm_set_epi32(12, 13, 14, 15);
47971        assert_eq_m128i(r, e);
47972    }
47973
47974    #[simd_test(enable = "avx512f,avx512vl")]
47975    unsafe fn test_mm_maskz_cvtepi8_epi32() {
47976        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47977        let r = _mm_maskz_cvtepi8_epi32(0, a);
47978        assert_eq_m128i(r, _mm_setzero_si128());
47979        let r = _mm_maskz_cvtepi8_epi32(0b00001111, a);
47980        let e = _mm_set_epi32(12, 13, 14, 15);
47981        assert_eq_m128i(r, e);
47982    }
47983
47984    #[simd_test(enable = "avx512f")]
47985    unsafe fn test_mm512_cvtepu8_epi32() {
47986        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47987        let r = _mm512_cvtepu8_epi32(a);
47988        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47989        assert_eq_m512i(r, e);
47990    }
47991
47992    #[simd_test(enable = "avx512f")]
47993    unsafe fn test_mm512_mask_cvtepu8_epi32() {
47994        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47995        let src = _mm512_set1_epi32(-1);
47996        let r = _mm512_mask_cvtepu8_epi32(src, 0, a);
47997        assert_eq_m512i(r, src);
47998        let r = _mm512_mask_cvtepu8_epi32(src, 0b00000000_11111111, a);
47999        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48000        assert_eq_m512i(r, e);
48001    }
48002
48003    #[simd_test(enable = "avx512f")]
48004    unsafe fn test_mm512_maskz_cvtepu8_epi32() {
48005        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48006        let r = _mm512_maskz_cvtepu8_epi32(0, a);
48007        assert_eq_m512i(r, _mm512_setzero_si512());
48008        let r = _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a);
48009        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48010        assert_eq_m512i(r, e);
48011    }
48012
48013    #[simd_test(enable = "avx512f,avx512vl")]
48014    unsafe fn test_mm256_mask_cvtepu8_epi32() {
48015        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48016        let src = _mm256_set1_epi32(-1);
48017        let r = _mm256_mask_cvtepu8_epi32(src, 0, a);
48018        assert_eq_m256i(r, src);
48019        let r = _mm256_mask_cvtepu8_epi32(src, 0b11111111, a);
48020        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48021        assert_eq_m256i(r, e);
48022    }
48023
48024    #[simd_test(enable = "avx512f,avx512vl")]
48025    unsafe fn test_mm256_maskz_cvtepu8_epi32() {
48026        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48027        let r = _mm256_maskz_cvtepu8_epi32(0, a);
48028        assert_eq_m256i(r, _mm256_setzero_si256());
48029        let r = _mm256_maskz_cvtepu8_epi32(0b11111111, a);
48030        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48031        assert_eq_m256i(r, e);
48032    }
48033
48034    #[simd_test(enable = "avx512f,avx512vl")]
48035    unsafe fn test_mm_mask_cvtepu8_epi32() {
48036        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48037        let src = _mm_set1_epi32(-1);
48038        let r = _mm_mask_cvtepu8_epi32(src, 0, a);
48039        assert_eq_m128i(r, src);
48040        let r = _mm_mask_cvtepu8_epi32(src, 0b00001111, a);
48041        let e = _mm_set_epi32(12, 13, 14, 15);
48042        assert_eq_m128i(r, e);
48043    }
48044
48045    #[simd_test(enable = "avx512f,avx512vl")]
48046    unsafe fn test_mm_maskz_cvtepu8_epi32() {
48047        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48048        let r = _mm_maskz_cvtepu8_epi32(0, a);
48049        assert_eq_m128i(r, _mm_setzero_si128());
48050        let r = _mm_maskz_cvtepu8_epi32(0b00001111, a);
48051        let e = _mm_set_epi32(12, 13, 14, 15);
48052        assert_eq_m128i(r, e);
48053    }
48054
48055    #[simd_test(enable = "avx512f")]
48056    unsafe fn test_mm512_cvtepi16_epi32() {
48057        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48058        let r = _mm512_cvtepi16_epi32(a);
48059        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48060        assert_eq_m512i(r, e);
48061    }
48062
48063    #[simd_test(enable = "avx512f")]
48064    unsafe fn test_mm512_mask_cvtepi16_epi32() {
48065        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48066        let src = _mm512_set1_epi32(-1);
48067        let r = _mm512_mask_cvtepi16_epi32(src, 0, a);
48068        assert_eq_m512i(r, src);
48069        let r = _mm512_mask_cvtepi16_epi32(src, 0b00000000_11111111, a);
48070        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48071        assert_eq_m512i(r, e);
48072    }
48073
48074    #[simd_test(enable = "avx512f")]
48075    unsafe fn test_mm512_maskz_cvtepi16_epi32() {
48076        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48077        let r = _mm512_maskz_cvtepi16_epi32(0, a);
48078        assert_eq_m512i(r, _mm512_setzero_si512());
48079        let r = _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a);
48080        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48081        assert_eq_m512i(r, e);
48082    }
48083
48084    #[simd_test(enable = "avx512f,avx512vl")]
48085    unsafe fn test_mm256_mask_cvtepi16_epi32() {
48086        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48087        let src = _mm256_set1_epi32(-1);
48088        let r = _mm256_mask_cvtepi16_epi32(src, 0, a);
48089        assert_eq_m256i(r, src);
48090        let r = _mm256_mask_cvtepi16_epi32(src, 0b11111111, a);
48091        let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48092        assert_eq_m256i(r, e);
48093    }
48094
48095    #[simd_test(enable = "avx512f,avx512vl")]
48096    unsafe fn test_mm256_maskz_cvtepi16_epi32() {
48097        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48098        let r = _mm256_maskz_cvtepi16_epi32(0, a);
48099        assert_eq_m256i(r, _mm256_setzero_si256());
48100        let r = _mm256_maskz_cvtepi16_epi32(0b11111111, a);
48101        let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48102        assert_eq_m256i(r, e);
48103    }
48104
48105    #[simd_test(enable = "avx512f,avx512vl")]
48106    unsafe fn test_mm_mask_cvtepi16_epi32() {
48107        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48108        let src = _mm_set1_epi32(-1);
48109        let r = _mm_mask_cvtepi16_epi32(src, 0, a);
48110        assert_eq_m128i(r, src);
48111        let r = _mm_mask_cvtepi16_epi32(src, 0b00001111, a);
48112        let e = _mm_set_epi32(4, 5, 6, 7);
48113        assert_eq_m128i(r, e);
48114    }
48115
48116    #[simd_test(enable = "avx512f,avx512vl")]
48117    unsafe fn test_mm_maskz_cvtepi16_epi32() {
48118        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48119        let r = _mm_maskz_cvtepi16_epi32(0, a);
48120        assert_eq_m128i(r, _mm_setzero_si128());
48121        let r = _mm_maskz_cvtepi16_epi32(0b00001111, a);
48122        let e = _mm_set_epi32(4, 5, 6, 7);
48123        assert_eq_m128i(r, e);
48124    }
48125
48126    #[simd_test(enable = "avx512f")]
48127    unsafe fn test_mm512_cvtepu16_epi32() {
48128        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48129        let r = _mm512_cvtepu16_epi32(a);
48130        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48131        assert_eq_m512i(r, e);
48132    }
48133
48134    #[simd_test(enable = "avx512f")]
48135    unsafe fn test_mm512_mask_cvtepu16_epi32() {
48136        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48137        let src = _mm512_set1_epi32(-1);
48138        let r = _mm512_mask_cvtepu16_epi32(src, 0, a);
48139        assert_eq_m512i(r, src);
48140        let r = _mm512_mask_cvtepu16_epi32(src, 0b00000000_11111111, a);
48141        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48142        assert_eq_m512i(r, e);
48143    }
48144
48145    #[simd_test(enable = "avx512f")]
48146    unsafe fn test_mm512_maskz_cvtepu16_epi32() {
48147        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48148        let r = _mm512_maskz_cvtepu16_epi32(0, a);
48149        assert_eq_m512i(r, _mm512_setzero_si512());
48150        let r = _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a);
48151        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48152        assert_eq_m512i(r, e);
48153    }
48154
48155    #[simd_test(enable = "avx512f,avx512vl")]
48156    unsafe fn test_mm256_mask_cvtepu16_epi32() {
48157        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48158        let src = _mm256_set1_epi32(-1);
48159        let r = _mm256_mask_cvtepu16_epi32(src, 0, a);
48160        assert_eq_m256i(r, src);
48161        let r = _mm256_mask_cvtepu16_epi32(src, 0b11111111, a);
48162        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48163        assert_eq_m256i(r, e);
48164    }
48165
48166    #[simd_test(enable = "avx512f,avx512vl")]
48167    unsafe fn test_mm256_maskz_cvtepu16_epi32() {
48168        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48169        let r = _mm256_maskz_cvtepu16_epi32(0, a);
48170        assert_eq_m256i(r, _mm256_setzero_si256());
48171        let r = _mm256_maskz_cvtepu16_epi32(0b11111111, a);
48172        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48173        assert_eq_m256i(r, e);
48174    }
48175
48176    #[simd_test(enable = "avx512f,avx512vl")]
48177    unsafe fn test_mm_mask_cvtepu16_epi32() {
48178        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48179        let src = _mm_set1_epi32(-1);
48180        let r = _mm_mask_cvtepu16_epi32(src, 0, a);
48181        assert_eq_m128i(r, src);
48182        let r = _mm_mask_cvtepu16_epi32(src, 0b00001111, a);
48183        let e = _mm_set_epi32(12, 13, 14, 15);
48184        assert_eq_m128i(r, e);
48185    }
48186
48187    #[simd_test(enable = "avx512f,avx512vl")]
48188    unsafe fn test_mm_maskz_cvtepu16_epi32() {
48189        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48190        let r = _mm_maskz_cvtepu16_epi32(0, a);
48191        assert_eq_m128i(r, _mm_setzero_si128());
48192        let r = _mm_maskz_cvtepu16_epi32(0b00001111, a);
48193        let e = _mm_set_epi32(12, 13, 14, 15);
48194        assert_eq_m128i(r, e);
48195    }
48196
48197    #[simd_test(enable = "avx512f")]
48198    unsafe fn test_mm512_cvtepi32_ps() {
48199        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48200        let r = _mm512_cvtepi32_ps(a);
48201        let e = _mm512_set_ps(
48202            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
48203        );
48204        assert_eq_m512(r, e);
48205    }
48206
48207    #[simd_test(enable = "avx512f")]
48208    unsafe fn test_mm512_mask_cvtepi32_ps() {
48209        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48210        let src = _mm512_set1_ps(-1.);
48211        let r = _mm512_mask_cvtepi32_ps(src, 0, a);
48212        assert_eq_m512(r, src);
48213        let r = _mm512_mask_cvtepi32_ps(src, 0b00000000_11111111, a);
48214        let e = _mm512_set_ps(
48215            -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
48216        );
48217        assert_eq_m512(r, e);
48218    }
48219
48220    #[simd_test(enable = "avx512f")]
48221    unsafe fn test_mm512_maskz_cvtepi32_ps() {
48222        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48223        let r = _mm512_maskz_cvtepi32_ps(0, a);
48224        assert_eq_m512(r, _mm512_setzero_ps());
48225        let r = _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a);
48226        let e = _mm512_set_ps(
48227            0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
48228        );
48229        assert_eq_m512(r, e);
48230    }
48231
48232    #[simd_test(enable = "avx512f,avx512vl")]
48233    unsafe fn test_mm256_mask_cvtepi32_ps() {
48234        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48235        let src = _mm256_set1_ps(-1.);
48236        let r = _mm256_mask_cvtepi32_ps(src, 0, a);
48237        assert_eq_m256(r, src);
48238        let r = _mm256_mask_cvtepi32_ps(src, 0b11111111, a);
48239        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
48240        assert_eq_m256(r, e);
48241    }
48242
48243    #[simd_test(enable = "avx512f,avx512vl")]
48244    unsafe fn test_mm256_maskz_cvtepi32_ps() {
48245        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48246        let r = _mm256_maskz_cvtepi32_ps(0, a);
48247        assert_eq_m256(r, _mm256_setzero_ps());
48248        let r = _mm256_maskz_cvtepi32_ps(0b11111111, a);
48249        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
48250        assert_eq_m256(r, e);
48251    }
48252
48253    #[simd_test(enable = "avx512f,avx512vl")]
48254    unsafe fn test_mm_mask_cvtepi32_ps() {
48255        let a = _mm_set_epi32(1, 2, 3, 4);
48256        let src = _mm_set1_ps(-1.);
48257        let r = _mm_mask_cvtepi32_ps(src, 0, a);
48258        assert_eq_m128(r, src);
48259        let r = _mm_mask_cvtepi32_ps(src, 0b00001111, a);
48260        let e = _mm_set_ps(1., 2., 3., 4.);
48261        assert_eq_m128(r, e);
48262    }
48263
48264    #[simd_test(enable = "avx512f,avx512vl")]
48265    unsafe fn test_mm_maskz_cvtepi32_ps() {
48266        let a = _mm_set_epi32(1, 2, 3, 4);
48267        let r = _mm_maskz_cvtepi32_ps(0, a);
48268        assert_eq_m128(r, _mm_setzero_ps());
48269        let r = _mm_maskz_cvtepi32_ps(0b00001111, a);
48270        let e = _mm_set_ps(1., 2., 3., 4.);
48271        assert_eq_m128(r, e);
48272    }
48273
48274    #[simd_test(enable = "avx512f")]
48275    unsafe fn test_mm512_cvtepu32_ps() {
48276        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48277        let r = _mm512_cvtepu32_ps(a);
48278        let e = _mm512_set_ps(
48279            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
48280        );
48281        assert_eq_m512(r, e);
48282    }
48283
48284    #[simd_test(enable = "avx512f")]
48285    unsafe fn test_mm512_mask_cvtepu32_ps() {
48286        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48287        let src = _mm512_set1_ps(-1.);
48288        let r = _mm512_mask_cvtepu32_ps(src, 0, a);
48289        assert_eq_m512(r, src);
48290        let r = _mm512_mask_cvtepu32_ps(src, 0b00000000_11111111, a);
48291        let e = _mm512_set_ps(
48292            -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
48293        );
48294        assert_eq_m512(r, e);
48295    }
48296
48297    #[simd_test(enable = "avx512f")]
48298    unsafe fn test_mm512_maskz_cvtepu32_ps() {
48299        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48300        let r = _mm512_maskz_cvtepu32_ps(0, a);
48301        assert_eq_m512(r, _mm512_setzero_ps());
48302        let r = _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a);
48303        let e = _mm512_set_ps(
48304            0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
48305        );
48306        assert_eq_m512(r, e);
48307    }
48308
48309    #[simd_test(enable = "avx512f")]
48310    unsafe fn test_mm512_cvtepi32_epi16() {
48311        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48312        let r = _mm512_cvtepi32_epi16(a);
48313        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48314        assert_eq_m256i(r, e);
48315    }
48316
48317    #[simd_test(enable = "avx512f")]
48318    unsafe fn test_mm512_mask_cvtepi32_epi16() {
48319        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48320        let src = _mm256_set1_epi16(-1);
48321        let r = _mm512_mask_cvtepi32_epi16(src, 0, a);
48322        assert_eq_m256i(r, src);
48323        let r = _mm512_mask_cvtepi32_epi16(src, 0b00000000_11111111, a);
48324        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48325        assert_eq_m256i(r, e);
48326    }
48327
48328    #[simd_test(enable = "avx512f")]
48329    unsafe fn test_mm512_maskz_cvtepi32_epi16() {
48330        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48331        let r = _mm512_maskz_cvtepi32_epi16(0, a);
48332        assert_eq_m256i(r, _mm256_setzero_si256());
48333        let r = _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a);
48334        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48335        assert_eq_m256i(r, e);
48336    }
48337
48338    #[simd_test(enable = "avx512f,avx512vl")]
48339    unsafe fn test_mm256_cvtepi32_epi16() {
48340        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48341        let r = _mm256_cvtepi32_epi16(a);
48342        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48343        assert_eq_m128i(r, e);
48344    }
48345
48346    #[simd_test(enable = "avx512f,avx512vl")]
48347    unsafe fn test_mm256_mask_cvtepi32_epi16() {
48348        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48349        let src = _mm_set1_epi16(-1);
48350        let r = _mm256_mask_cvtepi32_epi16(src, 0, a);
48351        assert_eq_m128i(r, src);
48352        let r = _mm256_mask_cvtepi32_epi16(src, 0b11111111, a);
48353        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48354        assert_eq_m128i(r, e);
48355    }
48356
48357    #[simd_test(enable = "avx512f,avx512vl")]
48358    unsafe fn test_mm256_maskz_cvtepi32_epi16() {
48359        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48360        let r = _mm256_maskz_cvtepi32_epi16(0, a);
48361        assert_eq_m128i(r, _mm_setzero_si128());
48362        let r = _mm256_maskz_cvtepi32_epi16(0b11111111, a);
48363        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48364        assert_eq_m128i(r, e);
48365    }
48366
48367    #[simd_test(enable = "avx512f,avx512vl")]
48368    unsafe fn test_mm_cvtepi32_epi16() {
48369        let a = _mm_set_epi32(4, 5, 6, 7);
48370        let r = _mm_cvtepi32_epi16(a);
48371        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48372        assert_eq_m128i(r, e);
48373    }
48374
48375    #[simd_test(enable = "avx512f,avx512vl")]
48376    unsafe fn test_mm_mask_cvtepi32_epi16() {
48377        let a = _mm_set_epi32(4, 5, 6, 7);
48378        let src = _mm_set1_epi16(0);
48379        let r = _mm_mask_cvtepi32_epi16(src, 0, a);
48380        assert_eq_m128i(r, src);
48381        let r = _mm_mask_cvtepi32_epi16(src, 0b00001111, a);
48382        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48383        assert_eq_m128i(r, e);
48384    }
48385
48386    #[simd_test(enable = "avx512f,avx512vl")]
48387    unsafe fn test_mm_maskz_cvtepi32_epi16() {
48388        let a = _mm_set_epi32(4, 5, 6, 7);
48389        let r = _mm_maskz_cvtepi32_epi16(0, a);
48390        assert_eq_m128i(r, _mm_setzero_si128());
48391        let r = _mm_maskz_cvtepi32_epi16(0b00001111, a);
48392        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48393        assert_eq_m128i(r, e);
48394    }
48395
48396    #[simd_test(enable = "avx512f")]
48397    unsafe fn test_mm512_cvtepi32_epi8() {
48398        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48399        let r = _mm512_cvtepi32_epi8(a);
48400        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48401        assert_eq_m128i(r, e);
48402    }
48403
48404    #[simd_test(enable = "avx512f")]
48405    unsafe fn test_mm512_mask_cvtepi32_epi8() {
48406        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48407        let src = _mm_set1_epi8(-1);
48408        let r = _mm512_mask_cvtepi32_epi8(src, 0, a);
48409        assert_eq_m128i(r, src);
48410        let r = _mm512_mask_cvtepi32_epi8(src, 0b00000000_11111111, a);
48411        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48412        assert_eq_m128i(r, e);
48413    }
48414
48415    #[simd_test(enable = "avx512f")]
48416    unsafe fn test_mm512_maskz_cvtepi32_epi8() {
48417        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48418        let r = _mm512_maskz_cvtepi32_epi8(0, a);
48419        assert_eq_m128i(r, _mm_setzero_si128());
48420        let r = _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a);
48421        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48422        assert_eq_m128i(r, e);
48423    }
48424
48425    #[simd_test(enable = "avx512f,avx512vl")]
48426    unsafe fn test_mm256_cvtepi32_epi8() {
48427        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48428        let r = _mm256_cvtepi32_epi8(a);
48429        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48430        assert_eq_m128i(r, e);
48431    }
48432
48433    #[simd_test(enable = "avx512f,avx512vl")]
48434    unsafe fn test_mm256_mask_cvtepi32_epi8() {
48435        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48436        let src = _mm_set1_epi8(0);
48437        let r = _mm256_mask_cvtepi32_epi8(src, 0, a);
48438        assert_eq_m128i(r, src);
48439        let r = _mm256_mask_cvtepi32_epi8(src, 0b11111111, a);
48440        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48441        assert_eq_m128i(r, e);
48442    }
48443
48444    #[simd_test(enable = "avx512f,avx512vl")]
48445    unsafe fn test_mm256_maskz_cvtepi32_epi8() {
48446        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48447        let r = _mm256_maskz_cvtepi32_epi8(0, a);
48448        assert_eq_m128i(r, _mm_setzero_si128());
48449        let r = _mm256_maskz_cvtepi32_epi8(0b11111111, a);
48450        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48451        assert_eq_m128i(r, e);
48452    }
48453
48454    #[simd_test(enable = "avx512f,avx512vl")]
48455    unsafe fn test_mm_cvtepi32_epi8() {
48456        let a = _mm_set_epi32(4, 5, 6, 7);
48457        let r = _mm_cvtepi32_epi8(a);
48458        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48459        assert_eq_m128i(r, e);
48460    }
48461
48462    #[simd_test(enable = "avx512f,avx512vl")]
48463    unsafe fn test_mm_mask_cvtepi32_epi8() {
48464        let a = _mm_set_epi32(4, 5, 6, 7);
48465        let src = _mm_set1_epi8(0);
48466        let r = _mm_mask_cvtepi32_epi8(src, 0, a);
48467        assert_eq_m128i(r, src);
48468        let r = _mm_mask_cvtepi32_epi8(src, 0b00001111, a);
48469        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48470        assert_eq_m128i(r, e);
48471    }
48472
48473    #[simd_test(enable = "avx512f,avx512vl")]
48474    unsafe fn test_mm_maskz_cvtepi32_epi8() {
48475        let a = _mm_set_epi32(4, 5, 6, 7);
48476        let r = _mm_maskz_cvtepi32_epi8(0, a);
48477        assert_eq_m128i(r, _mm_setzero_si128());
48478        let r = _mm_maskz_cvtepi32_epi8(0b00001111, a);
48479        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48480        assert_eq_m128i(r, e);
48481    }
48482
48483    #[simd_test(enable = "avx512f")]
48484    unsafe fn test_mm512_cvtsepi32_epi16() {
48485        #[rustfmt::skip]
48486        let a = _mm512_set_epi32(
48487            0, 1, 2, 3,
48488            4, 5, 6, 7,
48489            8, 9, 10, 11,
48490            12, 13, i32::MIN, i32::MAX,
48491        );
48492        let r = _mm512_cvtsepi32_epi16(a);
48493        #[rustfmt::skip]
48494        let e = _mm256_set_epi16(
48495            0, 1, 2, 3,
48496            4, 5, 6, 7,
48497            8, 9, 10, 11,
48498            12, 13, i16::MIN, i16::MAX,
48499        );
48500        assert_eq_m256i(r, e);
48501    }
48502
48503    #[simd_test(enable = "avx512f")]
48504    unsafe fn test_mm512_mask_cvtsepi32_epi16() {
48505        #[rustfmt::skip]
48506        let a = _mm512_set_epi32(
48507            0, 1, 2, 3,
48508            4, 5, 6, 7,
48509            8, 9, 10, 11,
48510            12, 13, i32::MIN, i32::MAX,
48511        );
48512        let src = _mm256_set1_epi16(-1);
48513        let r = _mm512_mask_cvtsepi32_epi16(src, 0, a);
48514        assert_eq_m256i(r, src);
48515        let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a);
48516        #[rustfmt::skip]
48517        let e = _mm256_set_epi16(
48518            -1, -1, -1, -1,
48519            -1, -1, -1, -1,
48520            8, 9, 10, 11,
48521            12, 13, i16::MIN, i16::MAX,
48522        );
48523        assert_eq_m256i(r, e);
48524    }
48525
48526    #[simd_test(enable = "avx512f")]
48527    unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
48528        #[rustfmt::skip]
48529        let a = _mm512_set_epi32(
48530            0, 1, 2, 3,
48531            4, 5, 6, 7,
48532            8, 9, 10, 11,
48533            12, 13, i32::MIN, i32::MAX,
48534        );
48535        let r = _mm512_maskz_cvtsepi32_epi16(0, a);
48536        assert_eq_m256i(r, _mm256_setzero_si256());
48537        let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a);
48538        #[rustfmt::skip]
48539        let e = _mm256_set_epi16(
48540            0, 0, 0, 0,
48541            0, 0, 0, 0,
48542            8, 9, 10, 11,
48543            12, 13, i16::MIN, i16::MAX,
48544        );
48545        assert_eq_m256i(r, e);
48546    }
48547
48548    #[simd_test(enable = "avx512f,avx512vl")]
48549    unsafe fn test_mm256_cvtsepi32_epi16() {
48550        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48551        let r = _mm256_cvtsepi32_epi16(a);
48552        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48553        assert_eq_m128i(r, e);
48554    }
48555
48556    #[simd_test(enable = "avx512f,avx512vl")]
48557    unsafe fn test_mm256_mask_cvtsepi32_epi16() {
48558        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48559        let src = _mm_set1_epi16(-1);
48560        let r = _mm256_mask_cvtsepi32_epi16(src, 0, a);
48561        assert_eq_m128i(r, src);
48562        let r = _mm256_mask_cvtsepi32_epi16(src, 0b11111111, a);
48563        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48564        assert_eq_m128i(r, e);
48565    }
48566
48567    #[simd_test(enable = "avx512f,avx512vl")]
48568    unsafe fn test_mm256_maskz_cvtsepi32_epi16() {
48569        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48570        let r = _mm256_maskz_cvtsepi32_epi16(0, a);
48571        assert_eq_m128i(r, _mm_setzero_si128());
48572        let r = _mm256_maskz_cvtsepi32_epi16(0b11111111, a);
48573        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48574        assert_eq_m128i(r, e);
48575    }
48576
48577    #[simd_test(enable = "avx512f,avx512vl")]
48578    unsafe fn test_mm_cvtsepi32_epi16() {
48579        let a = _mm_set_epi32(4, 5, 6, 7);
48580        let r = _mm_cvtsepi32_epi16(a);
48581        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48582        assert_eq_m128i(r, e);
48583    }
48584
48585    #[simd_test(enable = "avx512f,avx512vl")]
48586    unsafe fn test_mm_mask_cvtsepi32_epi16() {
48587        let a = _mm_set_epi32(4, 5, 6, 7);
48588        let src = _mm_set1_epi16(0);
48589        let r = _mm_mask_cvtsepi32_epi16(src, 0, a);
48590        assert_eq_m128i(r, src);
48591        let r = _mm_mask_cvtsepi32_epi16(src, 0b11111111, a);
48592        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48593        assert_eq_m128i(r, e);
48594    }
48595
48596    #[simd_test(enable = "avx512f,avx512vl")]
48597    unsafe fn test_mm_maskz_cvtsepi32_epi16() {
48598        let a = _mm_set_epi32(4, 5, 6, 7);
48599        let r = _mm_maskz_cvtsepi32_epi16(0, a);
48600        assert_eq_m128i(r, _mm_setzero_si128());
48601        let r = _mm_maskz_cvtsepi32_epi16(0b11111111, a);
48602        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48603        assert_eq_m128i(r, e);
48604    }
48605
48606    #[simd_test(enable = "avx512f")]
48607    unsafe fn test_mm512_cvtsepi32_epi8() {
48608        #[rustfmt::skip]
48609        let a = _mm512_set_epi32(
48610            0, 1, 2, 3,
48611            4, 5, 6, 7,
48612            8, 9, 10, 11,
48613            12, 13, i32::MIN, i32::MAX,
48614        );
48615        let r = _mm512_cvtsepi32_epi8(a);
48616        #[rustfmt::skip]
48617        let e = _mm_set_epi8(
48618            0, 1, 2, 3,
48619            4, 5, 6, 7,
48620            8, 9, 10, 11,
48621            12, 13, i8::MIN, i8::MAX,
48622        );
48623        assert_eq_m128i(r, e);
48624    }
48625
48626    #[simd_test(enable = "avx512f")]
48627    unsafe fn test_mm512_mask_cvtsepi32_epi8() {
48628        #[rustfmt::skip]
48629        let a = _mm512_set_epi32(
48630            0, 1, 2, 3,
48631            4, 5, 6, 7,
48632            8, 9, 10, 11,
48633            12, 13, i32::MIN, i32::MAX,
48634        );
48635        let src = _mm_set1_epi8(-1);
48636        let r = _mm512_mask_cvtsepi32_epi8(src, 0, a);
48637        assert_eq_m128i(r, src);
48638        let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a);
48639        #[rustfmt::skip]
48640        let e = _mm_set_epi8(
48641            -1, -1, -1, -1,
48642            -1, -1, -1, -1,
48643            8, 9, 10, 11,
48644            12, 13, i8::MIN, i8::MAX,
48645        );
48646        assert_eq_m128i(r, e);
48647    }
48648
48649    #[simd_test(enable = "avx512f")]
48650    unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
48651        #[rustfmt::skip]
48652        let a = _mm512_set_epi32(
48653            0, 1, 2, 3,
48654            4, 5, 6, 7,
48655            8, 9, 10, 11,
48656            12, 13, i32::MIN, i32::MAX,
48657        );
48658        let r = _mm512_maskz_cvtsepi32_epi8(0, a);
48659        assert_eq_m128i(r, _mm_setzero_si128());
48660        let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a);
48661        #[rustfmt::skip]
48662        let e = _mm_set_epi8(
48663            0, 0, 0, 0,
48664            0, 0, 0, 0,
48665            8, 9, 10, 11,
48666            12, 13, i8::MIN, i8::MAX,
48667        );
48668        assert_eq_m128i(r, e);
48669    }
48670
48671    #[simd_test(enable = "avx512f,avx512vl")]
48672    unsafe fn test_mm256_cvtsepi32_epi8() {
48673        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48674        let r = _mm256_cvtsepi32_epi8(a);
48675        #[rustfmt::skip]
48676        let e = _mm_set_epi8(
48677            0, 0, 0, 0,
48678            0, 0, 0, 0,
48679            9, 10, 11, 12,
48680            13, 14, 15, 16,
48681        );
48682        assert_eq_m128i(r, e);
48683    }
48684
48685    #[simd_test(enable = "avx512f,avx512vl")]
48686    unsafe fn test_mm256_mask_cvtsepi32_epi8() {
48687        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48688        let src = _mm_set1_epi8(0);
48689        let r = _mm256_mask_cvtsepi32_epi8(src, 0, a);
48690        assert_eq_m128i(r, src);
48691        let r = _mm256_mask_cvtsepi32_epi8(src, 0b11111111, a);
48692        #[rustfmt::skip]
48693        let e = _mm_set_epi8(
48694            0, 0, 0, 0,
48695            0, 0, 0, 0,
48696            9, 10, 11, 12,
48697            13, 14, 15, 16,
48698        );
48699        assert_eq_m128i(r, e);
48700    }
48701
48702    #[simd_test(enable = "avx512f,avx512vl")]
48703    unsafe fn test_mm256_maskz_cvtsepi32_epi8() {
48704        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48705        let r = _mm256_maskz_cvtsepi32_epi8(0, a);
48706        assert_eq_m128i(r, _mm_setzero_si128());
48707        let r = _mm256_maskz_cvtsepi32_epi8(0b11111111, a);
48708        #[rustfmt::skip]
48709        let e = _mm_set_epi8(
48710            0, 0, 0, 0,
48711            0, 0, 0, 0,
48712            9, 10, 11, 12,
48713            13, 14, 15, 16,
48714        );
48715        assert_eq_m128i(r, e);
48716    }
48717
48718    #[simd_test(enable = "avx512f,avx512vl")]
48719    unsafe fn test_mm_cvtsepi32_epi8() {
48720        let a = _mm_set_epi32(13, 14, 15, 16);
48721        let r = _mm_cvtsepi32_epi8(a);
48722        #[rustfmt::skip]
48723        let e = _mm_set_epi8(
48724            0, 0, 0, 0,
48725            0, 0, 0, 0,
48726            0, 0, 0, 0,
48727            13, 14, 15, 16,
48728        );
48729        assert_eq_m128i(r, e);
48730    }
48731
48732    #[simd_test(enable = "avx512f,avx512vl")]
48733    unsafe fn test_mm_mask_cvtsepi32_epi8() {
48734        let a = _mm_set_epi32(13, 14, 15, 16);
48735        let src = _mm_set1_epi8(0);
48736        let r = _mm_mask_cvtsepi32_epi8(src, 0, a);
48737        assert_eq_m128i(r, src);
48738        let r = _mm_mask_cvtsepi32_epi8(src, 0b00001111, a);
48739        #[rustfmt::skip]
48740        let e = _mm_set_epi8(
48741            0, 0, 0, 0,
48742            0, 0, 0, 0,
48743            0, 0, 0, 0,
48744            13, 14, 15, 16,
48745        );
48746        assert_eq_m128i(r, e);
48747    }
48748
48749    #[simd_test(enable = "avx512f,avx512vl")]
48750    unsafe fn test_mm_maskz_cvtsepi32_epi8() {
48751        let a = _mm_set_epi32(13, 14, 15, 16);
48752        let r = _mm_maskz_cvtsepi32_epi8(0, a);
48753        assert_eq_m128i(r, _mm_setzero_si128());
48754        let r = _mm_maskz_cvtsepi32_epi8(0b00001111, a);
48755        #[rustfmt::skip]
48756        let e = _mm_set_epi8(
48757            0, 0, 0, 0,
48758            0, 0, 0, 0,
48759            0, 0, 0, 0,
48760            13, 14, 15, 16,
48761        );
48762        assert_eq_m128i(r, e);
48763    }
48764
48765    #[simd_test(enable = "avx512f")]
48766    unsafe fn test_mm512_cvtusepi32_epi16() {
48767        #[rustfmt::skip]
48768        let a = _mm512_set_epi32(
48769            0, 1, 2, 3,
48770            4, 5, 6, 7,
48771            8, 9, 10, 11,
48772            12, 13, i32::MIN, i32::MIN,
48773        );
48774        let r = _mm512_cvtusepi32_epi16(a);
48775        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
48776        assert_eq_m256i(r, e);
48777    }
48778
48779    #[simd_test(enable = "avx512f")]
48780    unsafe fn test_mm512_mask_cvtusepi32_epi16() {
48781        #[rustfmt::skip]
48782        let a = _mm512_set_epi32(
48783            0, 1, 2, 3,
48784            4, 5, 6, 7,
48785            8, 9, 10, 11,
48786            12, 13, i32::MIN, i32::MIN,
48787        );
48788        let src = _mm256_set1_epi16(-1);
48789        let r = _mm512_mask_cvtusepi32_epi16(src, 0, a);
48790        assert_eq_m256i(r, src);
48791        let r = _mm512_mask_cvtusepi32_epi16(src, 0b00000000_11111111, a);
48792        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
48793        assert_eq_m256i(r, e);
48794    }
48795
48796    #[simd_test(enable = "avx512f")]
48797    unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
48798        #[rustfmt::skip]
48799        let a = _mm512_set_epi32(
48800            0, 1, 2, 3,
48801            4, 5, 6, 7,
48802            8, 9, 10, 11,
48803            12, 13, i32::MIN, i32::MIN,
48804        );
48805        let r = _mm512_maskz_cvtusepi32_epi16(0, a);
48806        assert_eq_m256i(r, _mm256_setzero_si256());
48807        let r = _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a);
48808        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
48809        assert_eq_m256i(r, e);
48810    }
48811
48812    #[simd_test(enable = "avx512f,avx512vl")]
48813    unsafe fn test_mm256_cvtusepi32_epi16() {
48814        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48815        let r = _mm256_cvtusepi32_epi16(a);
48816        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48817        assert_eq_m128i(r, e);
48818    }
48819
48820    #[simd_test(enable = "avx512f,avx512vl")]
48821    unsafe fn test_mm256_mask_cvtusepi32_epi16() {
48822        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48823        let src = _mm_set1_epi16(0);
48824        let r = _mm256_mask_cvtusepi32_epi16(src, 0, a);
48825        assert_eq_m128i(r, src);
48826        let r = _mm256_mask_cvtusepi32_epi16(src, 0b11111111, a);
48827        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48828        assert_eq_m128i(r, e);
48829    }
48830
48831    #[simd_test(enable = "avx512f,avx512vl")]
48832    unsafe fn test_mm256_maskz_cvtusepi32_epi16() {
48833        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48834        let r = _mm256_maskz_cvtusepi32_epi16(0, a);
48835        assert_eq_m128i(r, _mm_setzero_si128());
48836        let r = _mm256_maskz_cvtusepi32_epi16(0b11111111, a);
48837        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48838        assert_eq_m128i(r, e);
48839    }
48840
48841    #[simd_test(enable = "avx512f,avx512vl")]
48842    unsafe fn test_mm_cvtusepi32_epi16() {
48843        let a = _mm_set_epi32(5, 6, 7, 8);
48844        let r = _mm_cvtusepi32_epi16(a);
48845        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48846        assert_eq_m128i(r, e);
48847    }
48848
48849    #[simd_test(enable = "avx512f,avx512vl")]
48850    unsafe fn test_mm_mask_cvtusepi32_epi16() {
48851        let a = _mm_set_epi32(5, 6, 7, 8);
48852        let src = _mm_set1_epi16(0);
48853        let r = _mm_mask_cvtusepi32_epi16(src, 0, a);
48854        assert_eq_m128i(r, src);
48855        let r = _mm_mask_cvtusepi32_epi16(src, 0b00001111, a);
48856        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48857        assert_eq_m128i(r, e);
48858    }
48859
48860    #[simd_test(enable = "avx512f,avx512vl")]
48861    unsafe fn test_mm_maskz_cvtusepi32_epi16() {
48862        let a = _mm_set_epi32(5, 6, 7, 8);
48863        let r = _mm_maskz_cvtusepi32_epi16(0, a);
48864        assert_eq_m128i(r, _mm_setzero_si128());
48865        let r = _mm_maskz_cvtusepi32_epi16(0b00001111, a);
48866        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48867        assert_eq_m128i(r, e);
48868    }
48869
48870    #[simd_test(enable = "avx512f")]
48871    unsafe fn test_mm512_cvtusepi32_epi8() {
48872        #[rustfmt::skip]
48873        let a = _mm512_set_epi32(
48874            0, 1, 2, 3,
48875            4, 5, 6, 7,
48876            8, 9, 10, 11,
48877            12, 13, i32::MIN, i32::MIN,
48878        );
48879        let r = _mm512_cvtusepi32_epi8(a);
48880        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
48881        assert_eq_m128i(r, e);
48882    }
48883
48884    #[simd_test(enable = "avx512f")]
48885    unsafe fn test_mm512_mask_cvtusepi32_epi8() {
48886        #[rustfmt::skip]
48887        let a = _mm512_set_epi32(
48888            0, 1, 2, 3,
48889            4, 5, 6, 7,
48890            8, 9, 10, 11,
48891            12, 13, i32::MIN, i32::MIN,
48892        );
48893        let src = _mm_set1_epi8(-1);
48894        let r = _mm512_mask_cvtusepi32_epi8(src, 0, a);
48895        assert_eq_m128i(r, src);
48896        let r = _mm512_mask_cvtusepi32_epi8(src, 0b00000000_11111111, a);
48897        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
48898        assert_eq_m128i(r, e);
48899    }
48900
48901    #[simd_test(enable = "avx512f")]
48902    unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
48903        #[rustfmt::skip]
48904        let a = _mm512_set_epi32(
48905            0, 1, 2, 3,
48906            4, 5, 6, 7,
48907            8, 9, 10, 11,
48908            12, 13, i32::MIN, i32::MIN,
48909        );
48910        let r = _mm512_maskz_cvtusepi32_epi8(0, a);
48911        assert_eq_m128i(r, _mm_setzero_si128());
48912        let r = _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a);
48913        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
48914        assert_eq_m128i(r, e);
48915    }
48916
48917    #[simd_test(enable = "avx512f,avx512vl")]
48918    unsafe fn test_mm256_cvtusepi32_epi8() {
48919        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48920        let r = _mm256_cvtusepi32_epi8(a);
48921        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48922        assert_eq_m128i(r, e);
48923    }
48924
48925    #[simd_test(enable = "avx512f,avx512vl")]
48926    unsafe fn test_mm256_mask_cvtusepi32_epi8() {
48927        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48928        let src = _mm_set1_epi8(0);
48929        let r = _mm256_mask_cvtusepi32_epi8(src, 0, a);
48930        assert_eq_m128i(r, src);
48931        let r = _mm256_mask_cvtusepi32_epi8(src, 0b11111111, a);
48932        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48933        assert_eq_m128i(r, e);
48934    }
48935
48936    #[simd_test(enable = "avx512f,avx512vl")]
48937    unsafe fn test_mm256_maskz_cvtusepi32_epi8() {
48938        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48939        let r = _mm256_maskz_cvtusepi32_epi8(0, a);
48940        assert_eq_m128i(r, _mm_setzero_si128());
48941        let r = _mm256_maskz_cvtusepi32_epi8(0b11111111, a);
48942        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48943        assert_eq_m128i(r, e);
48944    }
48945
48946    #[simd_test(enable = "avx512f,avx512vl")]
48947    unsafe fn test_mm_cvtusepi32_epi8() {
48948        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48949        let r = _mm_cvtusepi32_epi8(a);
48950        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48951        assert_eq_m128i(r, e);
48952    }
48953
48954    #[simd_test(enable = "avx512f,avx512vl")]
48955    unsafe fn test_mm_mask_cvtusepi32_epi8() {
48956        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48957        let src = _mm_set1_epi8(0);
48958        let r = _mm_mask_cvtusepi32_epi8(src, 0, a);
48959        assert_eq_m128i(r, src);
48960        let r = _mm_mask_cvtusepi32_epi8(src, 0b00001111, a);
48961        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48962        assert_eq_m128i(r, e);
48963    }
48964
48965    #[simd_test(enable = "avx512f,avx512vl")]
48966    unsafe fn test_mm_maskz_cvtusepi32_epi8() {
48967        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48968        let r = _mm_maskz_cvtusepi32_epi8(0, a);
48969        assert_eq_m128i(r, _mm_setzero_si128());
48970        let r = _mm_maskz_cvtusepi32_epi8(0b00001111, a);
48971        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48972        assert_eq_m128i(r, e);
48973    }
48974
48975    #[simd_test(enable = "avx512f")]
48976    unsafe fn test_mm512_cvt_roundps_epi32() {
48977        let a = _mm512_setr_ps(
48978            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48979        );
48980        let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48981        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
48982        assert_eq_m512i(r, e);
48983        let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
48984        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
48985        assert_eq_m512i(r, e);
48986    }
48987
48988    #[simd_test(enable = "avx512f")]
48989    unsafe fn test_mm512_mask_cvt_roundps_epi32() {
48990        let a = _mm512_setr_ps(
48991            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48992        );
48993        let src = _mm512_set1_epi32(0);
48994        let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48995            src, 0, a,
48996        );
48997        assert_eq_m512i(r, src);
48998        let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48999            src,
49000            0b00000000_11111111,
49001            a,
49002        );
49003        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
49004        assert_eq_m512i(r, e);
49005    }
49006
49007    #[simd_test(enable = "avx512f")]
49008    unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
49009        let a = _mm512_setr_ps(
49010            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49011        );
49012        let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49013            0, a,
49014        );
49015        assert_eq_m512i(r, _mm512_setzero_si512());
49016        let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49017            0b00000000_11111111,
49018            a,
49019        );
49020        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
49021        assert_eq_m512i(r, e);
49022    }
49023
49024    #[simd_test(enable = "avx512f")]
49025    unsafe fn test_mm512_cvt_roundps_epu32() {
49026        let a = _mm512_setr_ps(
49027            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49028        );
49029        let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
49030        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
49031        assert_eq_m512i(r, e);
49032        let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
49033        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49034        assert_eq_m512i(r, e);
49035    }
49036
49037    #[simd_test(enable = "avx512f")]
49038    unsafe fn test_mm512_mask_cvt_roundps_epu32() {
49039        let a = _mm512_setr_ps(
49040            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49041        );
49042        let src = _mm512_set1_epi32(0);
49043        let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49044            src, 0, a,
49045        );
49046        assert_eq_m512i(r, src);
49047        let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49048            src,
49049            0b00000000_11111111,
49050            a,
49051        );
49052        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49053        assert_eq_m512i(r, e);
49054    }
49055
49056    #[simd_test(enable = "avx512f")]
49057    unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
49058        let a = _mm512_setr_ps(
49059            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49060        );
49061        let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49062            0, a,
49063        );
49064        assert_eq_m512i(r, _mm512_setzero_si512());
49065        let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49066            0b00000000_11111111,
49067            a,
49068        );
49069        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49070        assert_eq_m512i(r, e);
49071    }
49072
49073    #[simd_test(enable = "avx512f")]
49074    unsafe fn test_mm512_cvt_roundepi32_ps() {
49075        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49076        let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
49077        let e = _mm512_setr_ps(
49078            0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16.,
49079        );
49080        assert_eq_m512(r, e);
49081    }
49082
49083    #[simd_test(enable = "avx512f")]
49084    unsafe fn test_mm512_mask_cvt_roundepi32_ps() {
49085        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49086        let src = _mm512_set1_ps(0.);
49087        let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49088            src, 0, a,
49089        );
49090        assert_eq_m512(r, src);
49091        let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49092            src,
49093            0b00000000_11111111,
49094            a,
49095        );
49096        let e = _mm512_setr_ps(
49097            0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
49098        );
49099        assert_eq_m512(r, e);
49100    }
49101
49102    #[simd_test(enable = "avx512f")]
49103    unsafe fn test_mm512_maskz_cvt_roundepi32_ps() {
49104        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49105        let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49106            0, a,
49107        );
49108        assert_eq_m512(r, _mm512_setzero_ps());
49109        let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49110            0b00000000_11111111,
49111            a,
49112        );
49113        let e = _mm512_setr_ps(
49114            0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
49115        );
49116        assert_eq_m512(r, e);
49117    }
49118
49119    #[simd_test(enable = "avx512f")]
49120    unsafe fn test_mm512_cvt_roundepu32_ps() {
49121        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49122        let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
49123        #[rustfmt::skip]
49124        let e = _mm512_setr_ps(
49125            0., 4294967300., 2., 4294967300.,
49126            4., 4294967300., 6., 4294967300.,
49127            8., 10., 10., 12.,
49128            12., 14., 14., 16.,
49129        );
49130        assert_eq_m512(r, e);
49131    }
49132
49133    #[simd_test(enable = "avx512f")]
49134    unsafe fn test_mm512_mask_cvt_roundepu32_ps() {
49135        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49136        let src = _mm512_set1_ps(0.);
49137        let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49138            src, 0, a,
49139        );
49140        assert_eq_m512(r, src);
49141        let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49142            src,
49143            0b00000000_11111111,
49144            a,
49145        );
49146        #[rustfmt::skip]
49147        let e = _mm512_setr_ps(
49148            0., 4294967300., 2., 4294967300.,
49149            4., 4294967300., 6., 4294967300.,
49150            0., 0., 0., 0.,
49151            0., 0., 0., 0.,
49152        );
49153        assert_eq_m512(r, e);
49154    }
49155
49156    #[simd_test(enable = "avx512f")]
49157    unsafe fn test_mm512_maskz_cvt_roundepu32_ps() {
49158        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49159        let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49160            0, a,
49161        );
49162        assert_eq_m512(r, _mm512_setzero_ps());
49163        let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49164            0b00000000_11111111,
49165            a,
49166        );
49167        #[rustfmt::skip]
49168        let e = _mm512_setr_ps(
49169            0., 4294967300., 2., 4294967300.,
49170            4., 4294967300., 6., 4294967300.,
49171            0., 0., 0., 0.,
49172            0., 0., 0., 0.,
49173        );
49174        assert_eq_m512(r, e);
49175    }
49176
49177    #[simd_test(enable = "avx512f")]
49178    unsafe fn test_mm512_cvt_roundps_ph() {
49179        let a = _mm512_set1_ps(1.);
49180        let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a);
49181        let e = _mm256_setr_epi64x(
49182            4323521613979991040,
49183            4323521613979991040,
49184            4323521613979991040,
49185            4323521613979991040,
49186        );
49187        assert_eq_m256i(r, e);
49188    }
49189
49190    #[simd_test(enable = "avx512f")]
49191    unsafe fn test_mm512_mask_cvt_roundps_ph() {
49192        let a = _mm512_set1_ps(1.);
49193        let src = _mm256_set1_epi16(0);
49194        let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49195        assert_eq_m256i(r, src);
49196        let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49197        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49198        assert_eq_m256i(r, e);
49199    }
49200
49201    #[simd_test(enable = "avx512f")]
49202    unsafe fn test_mm512_maskz_cvt_roundps_ph() {
49203        let a = _mm512_set1_ps(1.);
49204        let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49205        assert_eq_m256i(r, _mm256_setzero_si256());
49206        let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49207        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49208        assert_eq_m256i(r, e);
49209    }
49210
49211    #[simd_test(enable = "avx512f,avx512vl")]
49212    unsafe fn test_mm256_mask_cvt_roundps_ph() {
49213        let a = _mm256_set1_ps(1.);
49214        let src = _mm_set1_epi16(0);
49215        let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49216        assert_eq_m128i(r, src);
49217        let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
49218        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49219        assert_eq_m128i(r, e);
49220    }
49221
49222    #[simd_test(enable = "avx512f,avx512vl")]
49223    unsafe fn test_mm256_maskz_cvt_roundps_ph() {
49224        let a = _mm256_set1_ps(1.);
49225        let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49226        assert_eq_m128i(r, _mm_setzero_si128());
49227        let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
49228        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49229        assert_eq_m128i(r, e);
49230    }
49231
49232    #[simd_test(enable = "avx512f,avx512vl")]
49233    unsafe fn test_mm_mask_cvt_roundps_ph() {
49234        let a = _mm_set1_ps(1.);
49235        let src = _mm_set1_epi16(0);
49236        let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49237        assert_eq_m128i(r, src);
49238        let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
49239        let e = _mm_setr_epi64x(4323521613979991040, 0);
49240        assert_eq_m128i(r, e);
49241    }
49242
49243    #[simd_test(enable = "avx512f,avx512vl")]
49244    unsafe fn test_mm_maskz_cvt_roundps_ph() {
49245        let a = _mm_set1_ps(1.);
49246        let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49247        assert_eq_m128i(r, _mm_setzero_si128());
49248        let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
49249        let e = _mm_setr_epi64x(4323521613979991040, 0);
49250        assert_eq_m128i(r, e);
49251    }
49252
49253    #[simd_test(enable = "avx512f")]
49254    unsafe fn test_mm512_cvtps_ph() {
49255        let a = _mm512_set1_ps(1.);
49256        let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a);
49257        let e = _mm256_setr_epi64x(
49258            4323521613979991040,
49259            4323521613979991040,
49260            4323521613979991040,
49261            4323521613979991040,
49262        );
49263        assert_eq_m256i(r, e);
49264    }
49265
49266    #[simd_test(enable = "avx512f")]
49267    unsafe fn test_mm512_mask_cvtps_ph() {
49268        let a = _mm512_set1_ps(1.);
49269        let src = _mm256_set1_epi16(0);
49270        let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49271        assert_eq_m256i(r, src);
49272        let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49273        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49274        assert_eq_m256i(r, e);
49275    }
49276
49277    #[simd_test(enable = "avx512f")]
49278    unsafe fn test_mm512_maskz_cvtps_ph() {
49279        let a = _mm512_set1_ps(1.);
49280        let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49281        assert_eq_m256i(r, _mm256_setzero_si256());
49282        let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49283        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49284        assert_eq_m256i(r, e);
49285    }
49286
49287    #[simd_test(enable = "avx512f,avx512vl")]
49288    unsafe fn test_mm256_mask_cvtps_ph() {
49289        let a = _mm256_set1_ps(1.);
49290        let src = _mm_set1_epi16(0);
49291        let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49292        assert_eq_m128i(r, src);
49293        let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
49294        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49295        assert_eq_m128i(r, e);
49296    }
49297
49298    #[simd_test(enable = "avx512f,avx512vl")]
49299    unsafe fn test_mm256_maskz_cvtps_ph() {
49300        let a = _mm256_set1_ps(1.);
49301        let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49302        assert_eq_m128i(r, _mm_setzero_si128());
49303        let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
49304        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49305        assert_eq_m128i(r, e);
49306    }
49307
49308    #[simd_test(enable = "avx512f,avx512vl")]
49309    unsafe fn test_mm_mask_cvtps_ph() {
49310        let a = _mm_set1_ps(1.);
49311        let src = _mm_set1_epi16(0);
49312        let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49313        assert_eq_m128i(r, src);
49314        let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
49315        let e = _mm_setr_epi64x(4323521613979991040, 0);
49316        assert_eq_m128i(r, e);
49317    }
49318
49319    #[simd_test(enable = "avx512f,avx512vl")]
49320    unsafe fn test_mm_maskz_cvtps_ph() {
49321        let a = _mm_set1_ps(1.);
49322        let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49323        assert_eq_m128i(r, _mm_setzero_si128());
49324        let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
49325        let e = _mm_setr_epi64x(4323521613979991040, 0);
49326        assert_eq_m128i(r, e);
49327    }
49328
49329    #[simd_test(enable = "avx512f")]
49330    unsafe fn test_mm512_cvt_roundph_ps() {
49331        let a = _mm256_setr_epi64x(
49332            4323521613979991040,
49333            4323521613979991040,
49334            4323521613979991040,
49335            4323521613979991040,
49336        );
49337        let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a);
49338        let e = _mm512_set1_ps(1.);
49339        assert_eq_m512(r, e);
49340    }
49341
49342    #[simd_test(enable = "avx512f")]
49343    unsafe fn test_mm512_mask_cvt_roundph_ps() {
49344        let a = _mm256_setr_epi64x(
49345            4323521613979991040,
49346            4323521613979991040,
49347            4323521613979991040,
49348            4323521613979991040,
49349        );
49350        let src = _mm512_set1_ps(0.);
49351        let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0, a);
49352        assert_eq_m512(r, src);
49353        let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49354        let e = _mm512_setr_ps(
49355            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49356        );
49357        assert_eq_m512(r, e);
49358    }
49359
49360    #[simd_test(enable = "avx512f")]
49361    unsafe fn test_mm512_maskz_cvt_roundph_ps() {
49362        let a = _mm256_setr_epi64x(
49363            4323521613979991040,
49364            4323521613979991040,
49365            4323521613979991040,
49366            4323521613979991040,
49367        );
49368        let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0, a);
49369        assert_eq_m512(r, _mm512_setzero_ps());
49370        let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49371        let e = _mm512_setr_ps(
49372            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49373        );
49374        assert_eq_m512(r, e);
49375    }
49376
49377    #[simd_test(enable = "avx512f")]
49378    unsafe fn test_mm512_cvtph_ps() {
49379        let a = _mm256_setr_epi64x(
49380            4323521613979991040,
49381            4323521613979991040,
49382            4323521613979991040,
49383            4323521613979991040,
49384        );
49385        let r = _mm512_cvtph_ps(a);
49386        let e = _mm512_set1_ps(1.);
49387        assert_eq_m512(r, e);
49388    }
49389
49390    #[simd_test(enable = "avx512f")]
49391    unsafe fn test_mm512_mask_cvtph_ps() {
49392        let a = _mm256_setr_epi64x(
49393            4323521613979991040,
49394            4323521613979991040,
49395            4323521613979991040,
49396            4323521613979991040,
49397        );
49398        let src = _mm512_set1_ps(0.);
49399        let r = _mm512_mask_cvtph_ps(src, 0, a);
49400        assert_eq_m512(r, src);
49401        let r = _mm512_mask_cvtph_ps(src, 0b00000000_11111111, a);
49402        let e = _mm512_setr_ps(
49403            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49404        );
49405        assert_eq_m512(r, e);
49406    }
49407
49408    #[simd_test(enable = "avx512f")]
49409    unsafe fn test_mm512_maskz_cvtph_ps() {
49410        let a = _mm256_setr_epi64x(
49411            4323521613979991040,
49412            4323521613979991040,
49413            4323521613979991040,
49414            4323521613979991040,
49415        );
49416        let r = _mm512_maskz_cvtph_ps(0, a);
49417        assert_eq_m512(r, _mm512_setzero_ps());
49418        let r = _mm512_maskz_cvtph_ps(0b00000000_11111111, a);
49419        let e = _mm512_setr_ps(
49420            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49421        );
49422        assert_eq_m512(r, e);
49423    }
49424
49425    #[simd_test(enable = "avx512f,avx512vl")]
49426    unsafe fn test_mm256_mask_cvtph_ps() {
49427        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49428        let src = _mm256_set1_ps(0.);
49429        let r = _mm256_mask_cvtph_ps(src, 0, a);
49430        assert_eq_m256(r, src);
49431        let r = _mm256_mask_cvtph_ps(src, 0b11111111, a);
49432        let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
49433        assert_eq_m256(r, e);
49434    }
49435
49436    #[simd_test(enable = "avx512f,avx512vl")]
49437    unsafe fn test_mm256_maskz_cvtph_ps() {
49438        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49439        let r = _mm256_maskz_cvtph_ps(0, a);
49440        assert_eq_m256(r, _mm256_setzero_ps());
49441        let r = _mm256_maskz_cvtph_ps(0b11111111, a);
49442        let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
49443        assert_eq_m256(r, e);
49444    }
49445
49446    #[simd_test(enable = "avx512f,avx512vl")]
49447    unsafe fn test_mm_mask_cvtph_ps() {
49448        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49449        let src = _mm_set1_ps(0.);
49450        let r = _mm_mask_cvtph_ps(src, 0, a);
49451        assert_eq_m128(r, src);
49452        let r = _mm_mask_cvtph_ps(src, 0b00001111, a);
49453        let e = _mm_setr_ps(1., 1., 1., 1.);
49454        assert_eq_m128(r, e);
49455    }
49456
49457    #[simd_test(enable = "avx512f,avx512vl")]
49458    unsafe fn test_mm_maskz_cvtph_ps() {
49459        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49460        let r = _mm_maskz_cvtph_ps(0, a);
49461        assert_eq_m128(r, _mm_setzero_ps());
49462        let r = _mm_maskz_cvtph_ps(0b00001111, a);
49463        let e = _mm_setr_ps(1., 1., 1., 1.);
49464        assert_eq_m128(r, e);
49465    }
49466
49467    #[simd_test(enable = "avx512f")]
49468    unsafe fn test_mm512_cvtt_roundps_epi32() {
49469        let a = _mm512_setr_ps(
49470            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49471        );
49472        let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a);
49473        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
49474        assert_eq_m512i(r, e);
49475    }
49476
49477    #[simd_test(enable = "avx512f")]
49478    unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
49479        let a = _mm512_setr_ps(
49480            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49481        );
49482        let src = _mm512_set1_epi32(0);
49483        let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0, a);
49484        assert_eq_m512i(r, src);
49485        let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49486        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49487        assert_eq_m512i(r, e);
49488    }
49489
49490    #[simd_test(enable = "avx512f")]
49491    unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
49492        let a = _mm512_setr_ps(
49493            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49494        );
49495        let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0, a);
49496        assert_eq_m512i(r, _mm512_setzero_si512());
49497        let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49498        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49499        assert_eq_m512i(r, e);
49500    }
49501
49502    #[simd_test(enable = "avx512f")]
49503    unsafe fn test_mm512_cvtt_roundps_epu32() {
49504        let a = _mm512_setr_ps(
49505            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49506        );
49507        let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a);
49508        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49509        assert_eq_m512i(r, e);
49510    }
49511
49512    #[simd_test(enable = "avx512f")]
49513    unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
49514        let a = _mm512_setr_ps(
49515            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49516        );
49517        let src = _mm512_set1_epi32(0);
49518        let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0, a);
49519        assert_eq_m512i(r, src);
49520        let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49521        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49522        assert_eq_m512i(r, e);
49523    }
49524
49525    #[simd_test(enable = "avx512f")]
49526    unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
49527        let a = _mm512_setr_ps(
49528            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49529        );
49530        let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0, a);
49531        assert_eq_m512i(r, _mm512_setzero_si512());
49532        let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49533        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49534        assert_eq_m512i(r, e);
49535    }
49536
49537    #[simd_test(enable = "avx512f")]
49538    unsafe fn test_mm512_cvttps_epi32() {
49539        let a = _mm512_setr_ps(
49540            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49541        );
49542        let r = _mm512_cvttps_epi32(a);
49543        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
49544        assert_eq_m512i(r, e);
49545    }
49546
49547    #[simd_test(enable = "avx512f")]
49548    unsafe fn test_mm512_mask_cvttps_epi32() {
49549        let a = _mm512_setr_ps(
49550            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49551        );
49552        let src = _mm512_set1_epi32(0);
49553        let r = _mm512_mask_cvttps_epi32(src, 0, a);
49554        assert_eq_m512i(r, src);
49555        let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a);
49556        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49557        assert_eq_m512i(r, e);
49558    }
49559
49560    #[simd_test(enable = "avx512f")]
49561    unsafe fn test_mm512_maskz_cvttps_epi32() {
49562        let a = _mm512_setr_ps(
49563            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49564        );
49565        let r = _mm512_maskz_cvttps_epi32(0, a);
49566        assert_eq_m512i(r, _mm512_setzero_si512());
49567        let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a);
49568        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49569        assert_eq_m512i(r, e);
49570    }
49571
49572    #[simd_test(enable = "avx512f,avx512vl")]
49573    unsafe fn test_mm256_mask_cvttps_epi32() {
49574        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49575        let src = _mm256_set1_epi32(0);
49576        let r = _mm256_mask_cvttps_epi32(src, 0, a);
49577        assert_eq_m256i(r, src);
49578        let r = _mm256_mask_cvttps_epi32(src, 0b11111111, a);
49579        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49580        assert_eq_m256i(r, e);
49581    }
49582
49583    #[simd_test(enable = "avx512f,avx512vl")]
49584    unsafe fn test_mm256_maskz_cvttps_epi32() {
49585        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49586        let r = _mm256_maskz_cvttps_epi32(0, a);
49587        assert_eq_m256i(r, _mm256_setzero_si256());
49588        let r = _mm256_maskz_cvttps_epi32(0b11111111, a);
49589        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49590        assert_eq_m256i(r, e);
49591    }
49592
49593    #[simd_test(enable = "avx512f,avx512vl")]
49594    unsafe fn test_mm_mask_cvttps_epi32() {
49595        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49596        let src = _mm_set1_epi32(0);
49597        let r = _mm_mask_cvttps_epi32(src, 0, a);
49598        assert_eq_m128i(r, src);
49599        let r = _mm_mask_cvttps_epi32(src, 0b00001111, a);
49600        let e = _mm_set_epi32(12, 13, 14, 15);
49601        assert_eq_m128i(r, e);
49602    }
49603
49604    #[simd_test(enable = "avx512f,avx512vl")]
49605    unsafe fn test_mm_maskz_cvttps_epi32() {
49606        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49607        let r = _mm_maskz_cvttps_epi32(0, a);
49608        assert_eq_m128i(r, _mm_setzero_si128());
49609        let r = _mm_maskz_cvttps_epi32(0b00001111, a);
49610        let e = _mm_set_epi32(12, 13, 14, 15);
49611        assert_eq_m128i(r, e);
49612    }
49613
49614    #[simd_test(enable = "avx512f")]
49615    unsafe fn test_mm512_cvttps_epu32() {
49616        let a = _mm512_setr_ps(
49617            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49618        );
49619        let r = _mm512_cvttps_epu32(a);
49620        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49621        assert_eq_m512i(r, e);
49622    }
49623
49624    #[simd_test(enable = "avx512f")]
49625    unsafe fn test_mm512_mask_cvttps_epu32() {
49626        let a = _mm512_setr_ps(
49627            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49628        );
49629        let src = _mm512_set1_epi32(0);
49630        let r = _mm512_mask_cvttps_epu32(src, 0, a);
49631        assert_eq_m512i(r, src);
49632        let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a);
49633        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49634        assert_eq_m512i(r, e);
49635    }
49636
49637    #[simd_test(enable = "avx512f")]
49638    unsafe fn test_mm512_maskz_cvttps_epu32() {
49639        let a = _mm512_setr_ps(
49640            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49641        );
49642        let r = _mm512_maskz_cvttps_epu32(0, a);
49643        assert_eq_m512i(r, _mm512_setzero_si512());
49644        let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a);
49645        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49646        assert_eq_m512i(r, e);
49647    }
49648
49649    #[simd_test(enable = "avx512f,avx512vl")]
49650    unsafe fn test_mm256_cvttps_epu32() {
49651        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49652        let r = _mm256_cvttps_epu32(a);
49653        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49654        assert_eq_m256i(r, e);
49655    }
49656
49657    #[simd_test(enable = "avx512f,avx512vl")]
49658    unsafe fn test_mm256_mask_cvttps_epu32() {
49659        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49660        let src = _mm256_set1_epi32(0);
49661        let r = _mm256_mask_cvttps_epu32(src, 0, a);
49662        assert_eq_m256i(r, src);
49663        let r = _mm256_mask_cvttps_epu32(src, 0b11111111, a);
49664        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49665        assert_eq_m256i(r, e);
49666    }
49667
49668    #[simd_test(enable = "avx512f,avx512vl")]
49669    unsafe fn test_mm256_maskz_cvttps_epu32() {
49670        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49671        let r = _mm256_maskz_cvttps_epu32(0, a);
49672        assert_eq_m256i(r, _mm256_setzero_si256());
49673        let r = _mm256_maskz_cvttps_epu32(0b11111111, a);
49674        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49675        assert_eq_m256i(r, e);
49676    }
49677
49678    #[simd_test(enable = "avx512f,avx512vl")]
49679    unsafe fn test_mm_cvttps_epu32() {
49680        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49681        let r = _mm_cvttps_epu32(a);
49682        let e = _mm_set_epi32(12, 13, 14, 15);
49683        assert_eq_m128i(r, e);
49684    }
49685
49686    #[simd_test(enable = "avx512f,avx512vl")]
49687    unsafe fn test_mm_mask_cvttps_epu32() {
49688        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49689        let src = _mm_set1_epi32(0);
49690        let r = _mm_mask_cvttps_epu32(src, 0, a);
49691        assert_eq_m128i(r, src);
49692        let r = _mm_mask_cvttps_epu32(src, 0b00001111, a);
49693        let e = _mm_set_epi32(12, 13, 14, 15);
49694        assert_eq_m128i(r, e);
49695    }
49696
49697    #[simd_test(enable = "avx512f,avx512vl")]
49698    unsafe fn test_mm_maskz_cvttps_epu32() {
49699        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49700        let r = _mm_maskz_cvttps_epu32(0, a);
49701        assert_eq_m128i(r, _mm_setzero_si128());
49702        let r = _mm_maskz_cvttps_epu32(0b00001111, a);
49703        let e = _mm_set_epi32(12, 13, 14, 15);
49704        assert_eq_m128i(r, e);
49705    }
49706
49707    #[simd_test(enable = "avx512f")]
49708    unsafe fn test_mm512_i32gather_ps() {
49709        let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
49710        // A multiplier of 4 is word-addressing
49711        #[rustfmt::skip]
49712        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49713                                      120, 128, 136, 144, 152, 160, 168, 176);
49714        let r = _mm512_i32gather_ps::<4>(index, arr.as_ptr());
49715        #[rustfmt::skip]
49716        assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
49717                                         120., 128., 136., 144., 152., 160., 168., 176.));
49718    }
49719
49720    #[simd_test(enable = "avx512f")]
49721    unsafe fn test_mm512_mask_i32gather_ps() {
49722        let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
49723        let src = _mm512_set1_ps(2.);
49724        let mask = 0b10101010_10101010;
49725        #[rustfmt::skip]
49726        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49727                                      120, 128, 136, 144, 152, 160, 168, 176);
49728        // A multiplier of 4 is word-addressing
49729        let r = _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr());
49730        #[rustfmt::skip]
49731        assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
49732                                         2., 128., 2., 144., 2., 160., 2., 176.));
49733    }
49734
49735    #[simd_test(enable = "avx512f")]
49736    unsafe fn test_mm512_i32gather_epi32() {
49737        let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
49738        // A multiplier of 4 is word-addressing
49739        #[rustfmt::skip]
49740        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49741                                      120, 128, 136, 144, 152, 160, 168, 176);
49742        let r = _mm512_i32gather_epi32::<4>(index, arr.as_ptr());
49743        #[rustfmt::skip]
49744        assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49745                                             120, 128, 136, 144, 152, 160, 168, 176));
49746    }
49747
49748    #[simd_test(enable = "avx512f")]
49749    unsafe fn test_mm512_mask_i32gather_epi32() {
49750        let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
49751        let src = _mm512_set1_epi32(2);
49752        let mask = 0b10101010_10101010;
49753        let index = _mm512_setr_epi32(
49754            0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
49755        );
49756        // A multiplier of 4 is word-addressing
49757        let r = _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr());
49758        assert_eq_m512i(
49759            r,
49760            _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, 2, 144, 2, 176, 2, 208, 2, 240),
49761        );
49762    }
49763
49764    #[simd_test(enable = "avx512f")]
49765    unsafe fn test_mm512_i32scatter_ps() {
49766        let mut arr = [0f32; 256];
49767        #[rustfmt::skip]
49768        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49769                                      128, 144, 160, 176, 192, 208, 224, 240);
49770        let src = _mm512_setr_ps(
49771            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
49772        );
49773        // A multiplier of 4 is word-addressing
49774        _mm512_i32scatter_ps::<4>(arr.as_mut_ptr(), index, src);
49775        let mut expected = [0f32; 256];
49776        for i in 0..16 {
49777            expected[i * 16] = (i + 1) as f32;
49778        }
49779        assert_eq!(&arr[..], &expected[..],);
49780    }
49781
49782    #[simd_test(enable = "avx512f")]
49783    unsafe fn test_mm512_mask_i32scatter_ps() {
49784        let mut arr = [0f32; 256];
49785        let mask = 0b10101010_10101010;
49786        #[rustfmt::skip]
49787        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49788                                      128, 144, 160, 176, 192, 208, 224, 240);
49789        let src = _mm512_setr_ps(
49790            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
49791        );
49792        // A multiplier of 4 is word-addressing
49793        _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr(), mask, index, src);
49794        let mut expected = [0f32; 256];
49795        for i in 0..8 {
49796            expected[i * 32 + 16] = 2. * (i + 1) as f32;
49797        }
49798        assert_eq!(&arr[..], &expected[..],);
49799    }
49800
49801    #[simd_test(enable = "avx512f")]
49802    unsafe fn test_mm512_i32scatter_epi32() {
49803        let mut arr = [0i32; 256];
49804        #[rustfmt::skip]
49805
49806        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49807                                      128, 144, 160, 176, 192, 208, 224, 240);
49808        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49809        // A multiplier of 4 is word-addressing
49810        _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr(), index, src);
49811        let mut expected = [0i32; 256];
49812        for i in 0..16 {
49813            expected[i * 16] = (i + 1) as i32;
49814        }
49815        assert_eq!(&arr[..], &expected[..],);
49816    }
49817
49818    #[simd_test(enable = "avx512f")]
49819    unsafe fn test_mm512_mask_i32scatter_epi32() {
49820        let mut arr = [0i32; 256];
49821        let mask = 0b10101010_10101010;
49822        #[rustfmt::skip]
49823        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49824                                      128, 144, 160, 176, 192, 208, 224, 240);
49825        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49826        // A multiplier of 4 is word-addressing
49827        _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr(), mask, index, src);
49828        let mut expected = [0i32; 256];
49829        for i in 0..8 {
49830            expected[i * 32 + 16] = 2 * (i + 1) as i32;
49831        }
49832        assert_eq!(&arr[..], &expected[..],);
49833    }
49834
49835    #[simd_test(enable = "avx512f")]
49836    unsafe fn test_mm512_cmplt_ps_mask() {
49837        #[rustfmt::skip]
49838        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49839                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49840        let b = _mm512_set1_ps(-1.);
49841        let m = _mm512_cmplt_ps_mask(a, b);
49842        assert_eq!(m, 0b00000101_00000101);
49843    }
49844
49845    #[simd_test(enable = "avx512f")]
49846    unsafe fn test_mm512_mask_cmplt_ps_mask() {
49847        #[rustfmt::skip]
49848        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49849                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49850        let b = _mm512_set1_ps(-1.);
49851        let mask = 0b01100110_01100110;
49852        let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
49853        assert_eq!(r, 0b00000100_00000100);
49854    }
49855
49856    #[simd_test(enable = "avx512f")]
49857    unsafe fn test_mm512_cmpnlt_ps_mask() {
49858        #[rustfmt::skip]
49859        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49860                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49861        let b = _mm512_set1_ps(-1.);
49862        assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
49863    }
49864
49865    #[simd_test(enable = "avx512f")]
49866    unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
49867        #[rustfmt::skip]
49868        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49869                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49870        let b = _mm512_set1_ps(-1.);
49871        let mask = 0b01111010_01111010;
49872        assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
49873    }
49874
49875    #[simd_test(enable = "avx512f")]
49876    unsafe fn test_mm512_cmpnle_ps_mask() {
49877        #[rustfmt::skip]
49878        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49879                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49880        let b = _mm512_set1_ps(-1.);
49881        let m = _mm512_cmpnle_ps_mask(b, a);
49882        assert_eq!(m, 0b00001101_00001101);
49883    }
49884
49885    #[simd_test(enable = "avx512f")]
49886    unsafe fn test_mm512_mask_cmpnle_ps_mask() {
49887        #[rustfmt::skip]
49888        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49889                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49890        let b = _mm512_set1_ps(-1.);
49891        let mask = 0b01100110_01100110;
49892        let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
49893        assert_eq!(r, 0b00000100_00000100);
49894    }
49895
49896    #[simd_test(enable = "avx512f")]
49897    unsafe fn test_mm512_cmple_ps_mask() {
49898        #[rustfmt::skip]
49899        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49900                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49901        let b = _mm512_set1_ps(-1.);
49902        assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
49903    }
49904
49905    #[simd_test(enable = "avx512f")]
49906    unsafe fn test_mm512_mask_cmple_ps_mask() {
49907        #[rustfmt::skip]
49908        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49909                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49910        let b = _mm512_set1_ps(-1.);
49911        let mask = 0b01111010_01111010;
49912        assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
49913    }
49914
49915    #[simd_test(enable = "avx512f")]
49916    unsafe fn test_mm512_cmpeq_ps_mask() {
49917        #[rustfmt::skip]
49918        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49919                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49920        #[rustfmt::skip]
49921        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49922                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49923        let m = _mm512_cmpeq_ps_mask(b, a);
49924        assert_eq!(m, 0b11001101_11001101);
49925    }
49926
49927    #[simd_test(enable = "avx512f")]
49928    unsafe fn test_mm512_mask_cmpeq_ps_mask() {
49929        #[rustfmt::skip]
49930        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49931                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49932        #[rustfmt::skip]
49933        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49934                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49935        let mask = 0b01111010_01111010;
49936        let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
49937        assert_eq!(r, 0b01001000_01001000);
49938    }
49939
49940    #[simd_test(enable = "avx512f")]
49941    unsafe fn test_mm512_cmpneq_ps_mask() {
49942        #[rustfmt::skip]
49943        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49944                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49945        #[rustfmt::skip]
49946        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49947                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49948        let m = _mm512_cmpneq_ps_mask(b, a);
49949        assert_eq!(m, 0b00110010_00110010);
49950    }
49951
49952    #[simd_test(enable = "avx512f")]
49953    unsafe fn test_mm512_mask_cmpneq_ps_mask() {
49954        #[rustfmt::skip]
49955        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49956                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49957        #[rustfmt::skip]
49958        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49959                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49960        let mask = 0b01111010_01111010;
49961        let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
49962        assert_eq!(r, 0b00110010_00110010)
49963    }
49964
49965    #[simd_test(enable = "avx512f")]
49966    unsafe fn test_mm512_cmp_ps_mask() {
49967        #[rustfmt::skip]
49968        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49969                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49970        let b = _mm512_set1_ps(-1.);
49971        let m = _mm512_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49972        assert_eq!(m, 0b00000101_00000101);
49973    }
49974
49975    #[simd_test(enable = "avx512f")]
49976    unsafe fn test_mm512_mask_cmp_ps_mask() {
49977        #[rustfmt::skip]
49978        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49979                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49980        let b = _mm512_set1_ps(-1.);
49981        let mask = 0b01100110_01100110;
49982        let r = _mm512_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49983        assert_eq!(r, 0b00000100_00000100);
49984    }
49985
49986    #[simd_test(enable = "avx512f,avx512vl")]
49987    unsafe fn test_mm256_cmp_ps_mask() {
49988        let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49989        let b = _mm256_set1_ps(-1.);
49990        let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49991        assert_eq!(m, 0b00000101);
49992    }
49993
49994    #[simd_test(enable = "avx512f,avx512vl")]
49995    unsafe fn test_mm256_mask_cmp_ps_mask() {
49996        let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49997        let b = _mm256_set1_ps(-1.);
49998        let mask = 0b01100110;
49999        let r = _mm256_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
50000        assert_eq!(r, 0b00000100);
50001    }
50002
50003    #[simd_test(enable = "avx512f,avx512vl")]
50004    unsafe fn test_mm_cmp_ps_mask() {
50005        let a = _mm_set_ps(0., 1., -1., 13.);
50006        let b = _mm_set1_ps(1.);
50007        let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
50008        assert_eq!(m, 0b00001010);
50009    }
50010
50011    #[simd_test(enable = "avx512f,avx512vl")]
50012    unsafe fn test_mm_mask_cmp_ps_mask() {
50013        let a = _mm_set_ps(0., 1., -1., 13.);
50014        let b = _mm_set1_ps(1.);
50015        let mask = 0b11111111;
50016        let r = _mm_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
50017        assert_eq!(r, 0b00001010);
50018    }
50019
50020    #[simd_test(enable = "avx512f")]
50021    unsafe fn test_mm512_cmp_round_ps_mask() {
50022        #[rustfmt::skip]
50023        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
50024                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
50025        let b = _mm512_set1_ps(-1.);
50026        let m = _mm512_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b);
50027        assert_eq!(m, 0b00000101_00000101);
50028    }
50029
50030    #[simd_test(enable = "avx512f")]
50031    unsafe fn test_mm512_mask_cmp_round_ps_mask() {
50032        #[rustfmt::skip]
50033        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
50034                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
50035        let b = _mm512_set1_ps(-1.);
50036        let mask = 0b01100110_01100110;
50037        let r = _mm512_mask_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b);
50038        assert_eq!(r, 0b00000100_00000100);
50039    }
50040
50041    #[simd_test(enable = "avx512f")]
50042    unsafe fn test_mm512_cmpord_ps_mask() {
50043        #[rustfmt::skip]
50044        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
50045                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
50046        #[rustfmt::skip]
50047        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
50048                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
50049        let m = _mm512_cmpord_ps_mask(a, b);
50050        assert_eq!(m, 0b00000101_00000101);
50051    }
50052
50053    #[simd_test(enable = "avx512f")]
50054    unsafe fn test_mm512_mask_cmpord_ps_mask() {
50055        #[rustfmt::skip]
50056        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
50057                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
50058        #[rustfmt::skip]
50059        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
50060                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
50061        let mask = 0b11000011_11000011;
50062        let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
50063        assert_eq!(m, 0b00000001_00000001);
50064    }
50065
50066    #[simd_test(enable = "avx512f")]
50067    unsafe fn test_mm512_cmpunord_ps_mask() {
50068        #[rustfmt::skip]
50069        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
50070                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
50071        #[rustfmt::skip]
50072        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
50073                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
50074        let m = _mm512_cmpunord_ps_mask(a, b);
50075
50076        assert_eq!(m, 0b11111010_11111010);
50077    }
50078
50079    #[simd_test(enable = "avx512f")]
50080    unsafe fn test_mm512_mask_cmpunord_ps_mask() {
50081        #[rustfmt::skip]
50082        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
50083                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
50084        #[rustfmt::skip]
50085        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
50086                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
50087        let mask = 0b00001111_00001111;
50088        let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
50089        assert_eq!(m, 0b000001010_00001010);
50090    }
50091
50092    #[simd_test(enable = "avx512f")]
50093    unsafe fn test_mm_cmp_ss_mask() {
50094        let a = _mm_setr_ps(2., 1., 1., 1.);
50095        let b = _mm_setr_ps(1., 2., 2., 2.);
50096        let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b);
50097        assert_eq!(m, 1);
50098    }
50099
50100    #[simd_test(enable = "avx512f")]
50101    unsafe fn test_mm_mask_cmp_ss_mask() {
50102        let a = _mm_setr_ps(2., 1., 1., 1.);
50103        let b = _mm_setr_ps(1., 2., 2., 2.);
50104        let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b10, a, b);
50105        assert_eq!(m, 0);
50106        let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b1, a, b);
50107        assert_eq!(m, 1);
50108    }
50109
50110    #[simd_test(enable = "avx512f")]
50111    unsafe fn test_mm_cmp_round_ss_mask() {
50112        let a = _mm_setr_ps(2., 1., 1., 1.);
50113        let b = _mm_setr_ps(1., 2., 2., 2.);
50114        let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
50115        assert_eq!(m, 1);
50116    }
50117
50118    #[simd_test(enable = "avx512f")]
50119    unsafe fn test_mm_mask_cmp_round_ss_mask() {
50120        let a = _mm_setr_ps(2., 1., 1., 1.);
50121        let b = _mm_setr_ps(1., 2., 2., 2.);
50122        let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
50123        assert_eq!(m, 0);
50124        let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
50125        assert_eq!(m, 1);
50126    }
50127
50128    #[simd_test(enable = "avx512f")]
50129    unsafe fn test_mm_cmp_sd_mask() {
50130        let a = _mm_setr_pd(2., 1.);
50131        let b = _mm_setr_pd(1., 2.);
50132        let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b);
50133        assert_eq!(m, 1);
50134    }
50135
50136    #[simd_test(enable = "avx512f")]
50137    unsafe fn test_mm_mask_cmp_sd_mask() {
50138        let a = _mm_setr_pd(2., 1.);
50139        let b = _mm_setr_pd(1., 2.);
50140        let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b10, a, b);
50141        assert_eq!(m, 0);
50142        let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b1, a, b);
50143        assert_eq!(m, 1);
50144    }
50145
50146    #[simd_test(enable = "avx512f")]
50147    unsafe fn test_mm_cmp_round_sd_mask() {
50148        let a = _mm_setr_pd(2., 1.);
50149        let b = _mm_setr_pd(1., 2.);
50150        let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
50151        assert_eq!(m, 1);
50152    }
50153
50154    #[simd_test(enable = "avx512f")]
50155    unsafe fn test_mm_mask_cmp_round_sd_mask() {
50156        let a = _mm_setr_pd(2., 1.);
50157        let b = _mm_setr_pd(1., 2.);
50158        let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
50159        assert_eq!(m, 0);
50160        let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
50161        assert_eq!(m, 1);
50162    }
50163
50164    #[simd_test(enable = "avx512f")]
50165    unsafe fn test_mm512_cmplt_epu32_mask() {
50166        #[rustfmt::skip]
50167        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50168                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50169        let b = _mm512_set1_epi32(-1);
50170        let m = _mm512_cmplt_epu32_mask(a, b);
50171        assert_eq!(m, 0b11001111_11001111);
50172    }
50173
50174    #[simd_test(enable = "avx512f")]
50175    unsafe fn test_mm512_mask_cmplt_epu32_mask() {
50176        #[rustfmt::skip]
50177        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50178                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50179        let b = _mm512_set1_epi32(-1);
50180        let mask = 0b01111010_01111010;
50181        let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
50182        assert_eq!(r, 0b01001010_01001010);
50183    }
50184
50185    #[simd_test(enable = "avx512f,avx512vl")]
50186    unsafe fn test_mm256_cmplt_epu32_mask() {
50187        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
50188        let b = _mm256_set1_epi32(1);
50189        let r = _mm256_cmplt_epu32_mask(a, b);
50190        assert_eq!(r, 0b10000000);
50191    }
50192
50193    #[simd_test(enable = "avx512f,avx512vl")]
50194    unsafe fn test_mm256_mask_cmplt_epu32_mask() {
50195        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
50196        let b = _mm256_set1_epi32(1);
50197        let mask = 0b11111111;
50198        let r = _mm256_mask_cmplt_epu32_mask(mask, a, b);
50199        assert_eq!(r, 0b10000000);
50200    }
50201
50202    #[simd_test(enable = "avx512f,avx512vl")]
50203    unsafe fn test_mm_cmplt_epu32_mask() {
50204        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50205        let b = _mm_set1_epi32(1);
50206        let r = _mm_cmplt_epu32_mask(a, b);
50207        assert_eq!(r, 0b00001000);
50208    }
50209
50210    #[simd_test(enable = "avx512f,avx512vl")]
50211    unsafe fn test_mm_mask_cmplt_epu32_mask() {
50212        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50213        let b = _mm_set1_epi32(1);
50214        let mask = 0b11111111;
50215        let r = _mm_mask_cmplt_epu32_mask(mask, a, b);
50216        assert_eq!(r, 0b00001000);
50217    }
50218
50219    #[simd_test(enable = "avx512f")]
50220    unsafe fn test_mm512_cmpgt_epu32_mask() {
50221        #[rustfmt::skip]
50222        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50223                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50224        let b = _mm512_set1_epi32(-1);
50225        let m = _mm512_cmpgt_epu32_mask(b, a);
50226        assert_eq!(m, 0b11001111_11001111);
50227    }
50228
50229    #[simd_test(enable = "avx512f")]
50230    unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
50231        #[rustfmt::skip]
50232        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50233                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50234        let b = _mm512_set1_epi32(-1);
50235        let mask = 0b01111010_01111010;
50236        let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
50237        assert_eq!(r, 0b01001010_01001010);
50238    }
50239
50240    #[simd_test(enable = "avx512f,avx512vl")]
50241    unsafe fn test_mm256_cmpgt_epu32_mask() {
50242        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
50243        let b = _mm256_set1_epi32(1);
50244        let r = _mm256_cmpgt_epu32_mask(a, b);
50245        assert_eq!(r, 0b00111111);
50246    }
50247
50248    #[simd_test(enable = "avx512f,avx512vl")]
50249    unsafe fn test_mm256_mask_cmpgt_epu32_mask() {
50250        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
50251        let b = _mm256_set1_epi32(1);
50252        let mask = 0b11111111;
50253        let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b);
50254        assert_eq!(r, 0b00111111);
50255    }
50256
50257    #[simd_test(enable = "avx512f,avx512vl")]
50258    unsafe fn test_mm_cmpgt_epu32_mask() {
50259        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50260        let b = _mm_set1_epi32(1);
50261        let r = _mm_cmpgt_epu32_mask(a, b);
50262        assert_eq!(r, 0b00000011);
50263    }
50264
50265    #[simd_test(enable = "avx512f,avx512vl")]
50266    unsafe fn test_mm_mask_cmpgt_epu32_mask() {
50267        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50268        let b = _mm_set1_epi32(1);
50269        let mask = 0b11111111;
50270        let r = _mm_mask_cmpgt_epu32_mask(mask, a, b);
50271        assert_eq!(r, 0b00000011);
50272    }
50273
50274    #[simd_test(enable = "avx512f")]
50275    unsafe fn test_mm512_cmple_epu32_mask() {
50276        #[rustfmt::skip]
50277        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50278                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50279        let b = _mm512_set1_epi32(-1);
50280        assert_eq!(
50281            _mm512_cmple_epu32_mask(a, b),
50282            !_mm512_cmpgt_epu32_mask(a, b)
50283        )
50284    }
50285
50286    #[simd_test(enable = "avx512f")]
50287    unsafe fn test_mm512_mask_cmple_epu32_mask() {
50288        #[rustfmt::skip]
50289        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50290                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50291        let b = _mm512_set1_epi32(-1);
50292        let mask = 0b01111010_01111010;
50293        assert_eq!(
50294            _mm512_mask_cmple_epu32_mask(mask, a, b),
50295            0b01111010_01111010
50296        );
50297    }
50298
50299    #[simd_test(enable = "avx512f,avx512vl")]
50300    unsafe fn test_mm256_cmple_epu32_mask() {
50301        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
50302        let b = _mm256_set1_epi32(1);
50303        let r = _mm256_cmple_epu32_mask(a, b);
50304        assert_eq!(r, 0b11000000)
50305    }
50306
50307    #[simd_test(enable = "avx512f,avx512vl")]
50308    unsafe fn test_mm256_mask_cmple_epu32_mask() {
50309        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
50310        let b = _mm256_set1_epi32(1);
50311        let mask = 0b11111111;
50312        let r = _mm256_mask_cmple_epu32_mask(mask, a, b);
50313        assert_eq!(r, 0b11000000)
50314    }
50315
50316    #[simd_test(enable = "avx512f,avx512vl")]
50317    unsafe fn test_mm_cmple_epu32_mask() {
50318        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50319        let b = _mm_set1_epi32(1);
50320        let r = _mm_cmple_epu32_mask(a, b);
50321        assert_eq!(r, 0b00001100)
50322    }
50323
50324    #[simd_test(enable = "avx512f,avx512vl")]
50325    unsafe fn test_mm_mask_cmple_epu32_mask() {
50326        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50327        let b = _mm_set1_epi32(1);
50328        let mask = 0b11111111;
50329        let r = _mm_mask_cmple_epu32_mask(mask, a, b);
50330        assert_eq!(r, 0b00001100)
50331    }
50332
50333    #[simd_test(enable = "avx512f")]
50334    unsafe fn test_mm512_cmpge_epu32_mask() {
50335        #[rustfmt::skip]
50336        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50337                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50338        let b = _mm512_set1_epi32(-1);
50339        assert_eq!(
50340            _mm512_cmpge_epu32_mask(a, b),
50341            !_mm512_cmplt_epu32_mask(a, b)
50342        )
50343    }
50344
50345    #[simd_test(enable = "avx512f")]
50346    unsafe fn test_mm512_mask_cmpge_epu32_mask() {
50347        #[rustfmt::skip]
50348        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50349                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50350        let b = _mm512_set1_epi32(-1);
50351        let mask = 0b01111010_01111010;
50352        assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
50353    }
50354
50355    #[simd_test(enable = "avx512f,avx512vl")]
50356    unsafe fn test_mm256_cmpge_epu32_mask() {
50357        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
50358        let b = _mm256_set1_epi32(1);
50359        let r = _mm256_cmpge_epu32_mask(a, b);
50360        assert_eq!(r, 0b01111111)
50361    }
50362
50363    #[simd_test(enable = "avx512f,avx512vl")]
50364    unsafe fn test_mm256_mask_cmpge_epu32_mask() {
50365        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
50366        let b = _mm256_set1_epi32(1);
50367        let mask = 0b11111111;
50368        let r = _mm256_mask_cmpge_epu32_mask(mask, a, b);
50369        assert_eq!(r, 0b01111111)
50370    }
50371
50372    #[simd_test(enable = "avx512f,avx512vl")]
50373    unsafe fn test_mm_cmpge_epu32_mask() {
50374        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50375        let b = _mm_set1_epi32(1);
50376        let r = _mm_cmpge_epu32_mask(a, b);
50377        assert_eq!(r, 0b00000111)
50378    }
50379
50380    #[simd_test(enable = "avx512f,avx512vl")]
50381    unsafe fn test_mm_mask_cmpge_epu32_mask() {
50382        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50383        let b = _mm_set1_epi32(1);
50384        let mask = 0b11111111;
50385        let r = _mm_mask_cmpge_epu32_mask(mask, a, b);
50386        assert_eq!(r, 0b00000111)
50387    }
50388
50389    #[simd_test(enable = "avx512f")]
50390    unsafe fn test_mm512_cmpeq_epu32_mask() {
50391        #[rustfmt::skip]
50392        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50393                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50394        #[rustfmt::skip]
50395        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50396                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50397        let m = _mm512_cmpeq_epu32_mask(b, a);
50398        assert_eq!(m, 0b11001111_11001111);
50399    }
50400
50401    #[simd_test(enable = "avx512f")]
50402    unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
50403        #[rustfmt::skip]
50404        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50405                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50406        #[rustfmt::skip]
50407        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50408                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50409        let mask = 0b01111010_01111010;
50410        let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
50411        assert_eq!(r, 0b01001010_01001010);
50412    }
50413
50414    #[simd_test(enable = "avx512f,avx512vl")]
50415    unsafe fn test_mm256_cmpeq_epu32_mask() {
50416        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50417        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50418        let m = _mm256_cmpeq_epu32_mask(b, a);
50419        assert_eq!(m, 0b11001111);
50420    }
50421
50422    #[simd_test(enable = "avx512f,avx512vl")]
50423    unsafe fn test_mm256_mask_cmpeq_epu32_mask() {
50424        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50425        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50426        let mask = 0b01111010;
50427        let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a);
50428        assert_eq!(r, 0b01001010);
50429    }
50430
50431    #[simd_test(enable = "avx512f,avx512vl")]
50432    unsafe fn test_mm_cmpeq_epu32_mask() {
50433        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50434        let b = _mm_set_epi32(0, 1, 13, 42);
50435        let m = _mm_cmpeq_epu32_mask(b, a);
50436        assert_eq!(m, 0b00001100);
50437    }
50438
50439    #[simd_test(enable = "avx512f,avx512vl")]
50440    unsafe fn test_mm_mask_cmpeq_epu32_mask() {
50441        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50442        let b = _mm_set_epi32(0, 1, 13, 42);
50443        let mask = 0b11111111;
50444        let r = _mm_mask_cmpeq_epu32_mask(mask, b, a);
50445        assert_eq!(r, 0b00001100);
50446    }
50447
50448    #[simd_test(enable = "avx512f")]
50449    unsafe fn test_mm512_cmpneq_epu32_mask() {
50450        #[rustfmt::skip]
50451        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50452                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50453        #[rustfmt::skip]
50454        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50455                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50456        let m = _mm512_cmpneq_epu32_mask(b, a);
50457        assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
50458    }
50459
50460    #[simd_test(enable = "avx512f")]
50461    unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
50462        #[rustfmt::skip]
50463        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
50464                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50465        #[rustfmt::skip]
50466        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50467                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50468        let mask = 0b01111010_01111010;
50469        let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
50470        assert_eq!(r, 0b00110010_00110010);
50471    }
50472
50473    #[simd_test(enable = "avx512f,avx512vl")]
50474    unsafe fn test_mm256_cmpneq_epu32_mask() {
50475        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50476        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
50477        let r = _mm256_cmpneq_epu32_mask(b, a);
50478        assert_eq!(r, 0b00110000);
50479    }
50480
50481    #[simd_test(enable = "avx512f,avx512vl")]
50482    unsafe fn test_mm256_mask_cmpneq_epu32_mask() {
50483        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50484        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
50485        let mask = 0b11111111;
50486        let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a);
50487        assert_eq!(r, 0b00110000);
50488    }
50489
50490    #[simd_test(enable = "avx512f,avx512vl")]
50491    unsafe fn test_mm_cmpneq_epu32_mask() {
50492        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50493        let b = _mm_set_epi32(0, 1, 13, 42);
50494        let r = _mm_cmpneq_epu32_mask(b, a);
50495        assert_eq!(r, 0b00000011);
50496    }
50497
50498    #[simd_test(enable = "avx512f,avx512vl")]
50499    unsafe fn test_mm_mask_cmpneq_epu32_mask() {
50500        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50501        let b = _mm_set_epi32(0, 1, 13, 42);
50502        let mask = 0b11111111;
50503        let r = _mm_mask_cmpneq_epu32_mask(mask, b, a);
50504        assert_eq!(r, 0b00000011);
50505    }
50506
50507    #[simd_test(enable = "avx512f")]
50508    unsafe fn test_mm512_cmp_epu32_mask() {
50509        #[rustfmt::skip]
50510        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50511                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50512        let b = _mm512_set1_epi32(-1);
50513        let m = _mm512_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50514        assert_eq!(m, 0b11001111_11001111);
50515    }
50516
50517    #[simd_test(enable = "avx512f")]
50518    unsafe fn test_mm512_mask_cmp_epu32_mask() {
50519        #[rustfmt::skip]
50520        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50521                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50522        let b = _mm512_set1_epi32(-1);
50523        let mask = 0b01111010_01111010;
50524        let r = _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50525        assert_eq!(r, 0b01001010_01001010);
50526    }
50527
50528    #[simd_test(enable = "avx512f,avx512vl")]
50529    unsafe fn test_mm256_cmp_epu32_mask() {
50530        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50531        let b = _mm256_set1_epi32(-1);
50532        let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50533        assert_eq!(m, 0b11001111);
50534    }
50535
50536    #[simd_test(enable = "avx512f,avx512vl")]
50537    unsafe fn test_mm256_mask_cmp_epu32_mask() {
50538        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50539        let b = _mm256_set1_epi32(-1);
50540        let mask = 0b11111111;
50541        let r = _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50542        assert_eq!(r, 0b11001111);
50543    }
50544
50545    #[simd_test(enable = "avx512f,avx512vl")]
50546    unsafe fn test_mm_cmp_epu32_mask() {
50547        let a = _mm_set_epi32(0, 1, -1, i32::MAX);
50548        let b = _mm_set1_epi32(1);
50549        let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50550        assert_eq!(m, 0b00001000);
50551    }
50552
50553    #[simd_test(enable = "avx512f,avx512vl")]
50554    unsafe fn test_mm_mask_cmp_epu32_mask() {
50555        let a = _mm_set_epi32(0, 1, -1, i32::MAX);
50556        let b = _mm_set1_epi32(1);
50557        let mask = 0b11111111;
50558        let r = _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50559        assert_eq!(r, 0b00001000);
50560    }
50561
50562    #[simd_test(enable = "avx512f")]
50563    unsafe fn test_mm512_cmplt_epi32_mask() {
50564        #[rustfmt::skip]
50565        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50566                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50567        let b = _mm512_set1_epi32(-1);
50568        let m = _mm512_cmplt_epi32_mask(a, b);
50569        assert_eq!(m, 0b00000101_00000101);
50570    }
50571
50572    #[simd_test(enable = "avx512f")]
50573    unsafe fn test_mm512_mask_cmplt_epi32_mask() {
50574        #[rustfmt::skip]
50575        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50576                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50577        let b = _mm512_set1_epi32(-1);
50578        let mask = 0b01100110_01100110;
50579        let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
50580        assert_eq!(r, 0b00000100_00000100);
50581    }
50582
50583    #[simd_test(enable = "avx512f,avx512vl")]
50584    unsafe fn test_mm256_cmplt_epi32_mask() {
50585        let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
50586        let b = _mm256_set1_epi32(-1);
50587        let r = _mm256_cmplt_epi32_mask(a, b);
50588        assert_eq!(r, 0b00000101);
50589    }
50590
50591    #[simd_test(enable = "avx512f,avx512vl")]
50592    unsafe fn test_mm256_mask_cmplt_epi32_mask() {
50593        let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
50594        let b = _mm256_set1_epi32(-1);
50595        let mask = 0b11111111;
50596        let r = _mm256_mask_cmplt_epi32_mask(mask, a, b);
50597        assert_eq!(r, 0b00000101);
50598    }
50599
50600    #[simd_test(enable = "avx512f,avx512vl")]
50601    unsafe fn test_mm_cmplt_epi32_mask() {
50602        let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
50603        let b = _mm_set1_epi32(-1);
50604        let r = _mm_cmplt_epi32_mask(a, b);
50605        assert_eq!(r, 0b00000101);
50606    }
50607
50608    #[simd_test(enable = "avx512f,avx512vl")]
50609    unsafe fn test_mm_mask_cmplt_epi32_mask() {
50610        let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
50611        let b = _mm_set1_epi32(-1);
50612        let mask = 0b11111111;
50613        let r = _mm_mask_cmplt_epi32_mask(mask, a, b);
50614        assert_eq!(r, 0b00000101);
50615    }
50616
50617    #[simd_test(enable = "avx512f")]
50618    unsafe fn test_mm512_cmpgt_epi32_mask() {
50619        #[rustfmt::skip]
50620        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50621                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50622        let b = _mm512_set1_epi32(-1);
50623        let m = _mm512_cmpgt_epi32_mask(b, a);
50624        assert_eq!(m, 0b00000101_00000101);
50625    }
50626
50627    #[simd_test(enable = "avx512f")]
50628    unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
50629        #[rustfmt::skip]
50630        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50631                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50632        let b = _mm512_set1_epi32(-1);
50633        let mask = 0b01100110_01100110;
50634        let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
50635        assert_eq!(r, 0b00000100_00000100);
50636    }
50637
50638    #[simd_test(enable = "avx512f,avx512vl")]
50639    unsafe fn test_mm256_cmpgt_epi32_mask() {
50640        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50641        let b = _mm256_set1_epi32(-1);
50642        let r = _mm256_cmpgt_epi32_mask(a, b);
50643        assert_eq!(r, 0b11011010);
50644    }
50645
50646    #[simd_test(enable = "avx512f,avx512vl")]
50647    unsafe fn test_mm256_mask_cmpgt_epi32_mask() {
50648        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50649        let b = _mm256_set1_epi32(-1);
50650        let mask = 0b11111111;
50651        let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b);
50652        assert_eq!(r, 0b11011010);
50653    }
50654
50655    #[simd_test(enable = "avx512f,avx512vl")]
50656    unsafe fn test_mm_cmpgt_epi32_mask() {
50657        let a = _mm_set_epi32(0, 1, -1, 13);
50658        let b = _mm_set1_epi32(-1);
50659        let r = _mm_cmpgt_epi32_mask(a, b);
50660        assert_eq!(r, 0b00001101);
50661    }
50662
50663    #[simd_test(enable = "avx512f,avx512vl")]
50664    unsafe fn test_mm_mask_cmpgt_epi32_mask() {
50665        let a = _mm_set_epi32(0, 1, -1, 13);
50666        let b = _mm_set1_epi32(-1);
50667        let mask = 0b11111111;
50668        let r = _mm_mask_cmpgt_epi32_mask(mask, a, b);
50669        assert_eq!(r, 0b00001101);
50670    }
50671
50672    #[simd_test(enable = "avx512f")]
50673    unsafe fn test_mm512_cmple_epi32_mask() {
50674        #[rustfmt::skip]
50675        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50676                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50677        let b = _mm512_set1_epi32(-1);
50678        assert_eq!(
50679            _mm512_cmple_epi32_mask(a, b),
50680            !_mm512_cmpgt_epi32_mask(a, b)
50681        )
50682    }
50683
50684    #[simd_test(enable = "avx512f")]
50685    unsafe fn test_mm512_mask_cmple_epi32_mask() {
50686        #[rustfmt::skip]
50687        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50688                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50689        let b = _mm512_set1_epi32(-1);
50690        let mask = 0b01111010_01111010;
50691        assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
50692    }
50693
50694    #[simd_test(enable = "avx512f,avx512vl")]
50695    unsafe fn test_mm256_cmple_epi32_mask() {
50696        let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
50697        let b = _mm256_set1_epi32(-1);
50698        let r = _mm256_cmple_epi32_mask(a, b);
50699        assert_eq!(r, 0b00100101)
50700    }
50701
50702    #[simd_test(enable = "avx512f,avx512vl")]
50703    unsafe fn test_mm256_mask_cmple_epi32_mask() {
50704        let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
50705        let b = _mm256_set1_epi32(-1);
50706        let mask = 0b11111111;
50707        let r = _mm256_mask_cmple_epi32_mask(mask, a, b);
50708        assert_eq!(r, 0b00100101)
50709    }
50710
50711    #[simd_test(enable = "avx512f,avx512vl")]
50712    unsafe fn test_mm_cmple_epi32_mask() {
50713        let a = _mm_set_epi32(0, 1, -1, 200);
50714        let b = _mm_set1_epi32(-1);
50715        let r = _mm_cmple_epi32_mask(a, b);
50716        assert_eq!(r, 0b00000010)
50717    }
50718
50719    #[simd_test(enable = "avx512f,avx512vl")]
50720    unsafe fn test_mm_mask_cmple_epi32_mask() {
50721        let a = _mm_set_epi32(0, 1, -1, 200);
50722        let b = _mm_set1_epi32(-1);
50723        let mask = 0b11111111;
50724        let r = _mm_mask_cmple_epi32_mask(mask, a, b);
50725        assert_eq!(r, 0b00000010)
50726    }
50727
50728    #[simd_test(enable = "avx512f")]
50729    unsafe fn test_mm512_cmpge_epi32_mask() {
50730        #[rustfmt::skip]
50731        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50732                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50733        let b = _mm512_set1_epi32(-1);
50734        assert_eq!(
50735            _mm512_cmpge_epi32_mask(a, b),
50736            !_mm512_cmplt_epi32_mask(a, b)
50737        )
50738    }
50739
50740    #[simd_test(enable = "avx512f")]
50741    unsafe fn test_mm512_mask_cmpge_epi32_mask() {
50742        #[rustfmt::skip]
50743        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50744                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50745        let b = _mm512_set1_epi32(-1);
50746        let mask = 0b01111010_01111010;
50747        assert_eq!(
50748            _mm512_mask_cmpge_epi32_mask(mask, a, b),
50749            0b01111010_01111010
50750        );
50751    }
50752
50753    #[simd_test(enable = "avx512f,avx512vl")]
50754    unsafe fn test_mm256_cmpge_epi32_mask() {
50755        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50756        let b = _mm256_set1_epi32(-1);
50757        let r = _mm256_cmpge_epi32_mask(a, b);
50758        assert_eq!(r, 0b11111010)
50759    }
50760
50761    #[simd_test(enable = "avx512f,avx512vl")]
50762    unsafe fn test_mm256_mask_cmpge_epi32_mask() {
50763        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50764        let b = _mm256_set1_epi32(-1);
50765        let mask = 0b11111111;
50766        let r = _mm256_mask_cmpge_epi32_mask(mask, a, b);
50767        assert_eq!(r, 0b11111010)
50768    }
50769
50770    #[simd_test(enable = "avx512f,avx512vl")]
50771    unsafe fn test_mm_cmpge_epi32_mask() {
50772        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50773        let b = _mm_set1_epi32(-1);
50774        let r = _mm_cmpge_epi32_mask(a, b);
50775        assert_eq!(r, 0b00001111)
50776    }
50777
50778    #[simd_test(enable = "avx512f,avx512vl")]
50779    unsafe fn test_mm_mask_cmpge_epi32_mask() {
50780        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50781        let b = _mm_set1_epi32(-1);
50782        let mask = 0b11111111;
50783        let r = _mm_mask_cmpge_epi32_mask(mask, a, b);
50784        assert_eq!(r, 0b00001111)
50785    }
50786
50787    #[simd_test(enable = "avx512f")]
50788    unsafe fn test_mm512_cmpeq_epi32_mask() {
50789        #[rustfmt::skip]
50790        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50791                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50792        #[rustfmt::skip]
50793        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50794                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50795        let m = _mm512_cmpeq_epi32_mask(b, a);
50796        assert_eq!(m, 0b11001111_11001111);
50797    }
50798
50799    #[simd_test(enable = "avx512f")]
50800    unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
50801        #[rustfmt::skip]
50802        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50803                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50804        #[rustfmt::skip]
50805        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50806                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50807        let mask = 0b01111010_01111010;
50808        let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
50809        assert_eq!(r, 0b01001010_01001010);
50810    }
50811
50812    #[simd_test(enable = "avx512f,avx512vl")]
50813    unsafe fn test_mm256_cmpeq_epi32_mask() {
50814        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50815        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50816        let m = _mm256_cmpeq_epi32_mask(b, a);
50817        assert_eq!(m, 0b11001111);
50818    }
50819
50820    #[simd_test(enable = "avx512f,avx512vl")]
50821    unsafe fn test_mm256_mask_cmpeq_epi32_mask() {
50822        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50823        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50824        let mask = 0b01111010;
50825        let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a);
50826        assert_eq!(r, 0b01001010);
50827    }
50828
50829    #[simd_test(enable = "avx512f,avx512vl")]
50830    unsafe fn test_mm_cmpeq_epi32_mask() {
50831        let a = _mm_set_epi32(0, 1, -1, 13);
50832        let b = _mm_set_epi32(0, 1, 13, 42);
50833        let m = _mm_cmpeq_epi32_mask(b, a);
50834        assert_eq!(m, 0b00001100);
50835    }
50836
50837    #[simd_test(enable = "avx512f,avx512vl")]
50838    unsafe fn test_mm_mask_cmpeq_epi32_mask() {
50839        let a = _mm_set_epi32(0, 1, -1, 13);
50840        let b = _mm_set_epi32(0, 1, 13, 42);
50841        let mask = 0b11111111;
50842        let r = _mm_mask_cmpeq_epi32_mask(mask, b, a);
50843        assert_eq!(r, 0b00001100);
50844    }
50845
50846    #[simd_test(enable = "avx512f")]
50847    unsafe fn test_mm512_cmpneq_epi32_mask() {
50848        #[rustfmt::skip]
50849        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50850                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50851        #[rustfmt::skip]
50852        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50853                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50854        let m = _mm512_cmpneq_epi32_mask(b, a);
50855        assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
50856    }
50857
50858    #[simd_test(enable = "avx512f")]
50859    unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
50860        #[rustfmt::skip]
50861        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
50862                                 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
50863        #[rustfmt::skip]
50864        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50865                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50866        let mask = 0b01111010_01111010;
50867        let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
50868        assert_eq!(r, 0b00110010_00110010)
50869    }
50870
50871    #[simd_test(enable = "avx512f,avx512vl")]
50872    unsafe fn test_mm256_cmpneq_epi32_mask() {
50873        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50874        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50875        let m = _mm256_cmpneq_epi32_mask(b, a);
50876        assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a));
50877    }
50878
50879    #[simd_test(enable = "avx512f,avx512vl")]
50880    unsafe fn test_mm256_mask_cmpneq_epi32_mask() {
50881        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
50882        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50883        let mask = 0b11111111;
50884        let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a);
50885        assert_eq!(r, 0b00110011)
50886    }
50887
50888    #[simd_test(enable = "avx512f,avx512vl")]
50889    unsafe fn test_mm_cmpneq_epi32_mask() {
50890        let a = _mm_set_epi32(0, 1, -1, 13);
50891        let b = _mm_set_epi32(0, 1, 13, 42);
50892        let r = _mm_cmpneq_epi32_mask(b, a);
50893        assert_eq!(r, 0b00000011)
50894    }
50895
50896    #[simd_test(enable = "avx512f,avx512vl")]
50897    unsafe fn test_mm_mask_cmpneq_epi32_mask() {
50898        let a = _mm_set_epi32(0, 1, -1, 13);
50899        let b = _mm_set_epi32(0, 1, 13, 42);
50900        let mask = 0b11111111;
50901        let r = _mm_mask_cmpneq_epi32_mask(mask, b, a);
50902        assert_eq!(r, 0b00000011)
50903    }
50904
50905    #[simd_test(enable = "avx512f")]
50906    unsafe fn test_mm512_cmp_epi32_mask() {
50907        #[rustfmt::skip]
50908        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50909                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50910        let b = _mm512_set1_epi32(-1);
50911        let m = _mm512_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50912        assert_eq!(m, 0b00000101_00000101);
50913    }
50914
50915    #[simd_test(enable = "avx512f")]
50916    unsafe fn test_mm512_mask_cmp_epi32_mask() {
50917        #[rustfmt::skip]
50918        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50919                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50920        let b = _mm512_set1_epi32(-1);
50921        let mask = 0b01100110_01100110;
50922        let r = _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50923        assert_eq!(r, 0b00000100_00000100);
50924    }
50925
50926    #[simd_test(enable = "avx512f,avx512vl")]
50927    unsafe fn test_mm256_cmp_epi32_mask() {
50928        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50929        let b = _mm256_set1_epi32(-1);
50930        let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50931        assert_eq!(m, 0b00000101);
50932    }
50933
50934    #[simd_test(enable = "avx512f,avx512vl")]
50935    unsafe fn test_mm256_mask_cmp_epi32_mask() {
50936        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50937        let b = _mm256_set1_epi32(-1);
50938        let mask = 0b01100110;
50939        let r = _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50940        assert_eq!(r, 0b00000100);
50941    }
50942
50943    #[simd_test(enable = "avx512f,avx512vl")]
50944    unsafe fn test_mm_cmp_epi32_mask() {
50945        let a = _mm_set_epi32(0, 1, -1, 13);
50946        let b = _mm_set1_epi32(1);
50947        let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50948        assert_eq!(m, 0b00001010);
50949    }
50950
50951    #[simd_test(enable = "avx512f,avx512vl")]
50952    unsafe fn test_mm_mask_cmp_epi32_mask() {
50953        let a = _mm_set_epi32(0, 1, -1, 13);
50954        let b = _mm_set1_epi32(1);
50955        let mask = 0b11111111;
50956        let r = _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50957        assert_eq!(r, 0b00001010);
50958    }
50959
50960    #[simd_test(enable = "avx512f")]
50961    unsafe fn test_mm512_set_epi8() {
50962        let r = _mm512_set1_epi8(2);
50963        assert_eq_m512i(
50964            r,
50965            _mm512_set_epi8(
50966                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50967                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50968                2, 2, 2, 2, 2, 2, 2, 2,
50969            ),
50970        )
50971    }
50972
50973    #[simd_test(enable = "avx512f")]
50974    unsafe fn test_mm512_set_epi16() {
50975        let r = _mm512_set1_epi16(2);
50976        assert_eq_m512i(
50977            r,
50978            _mm512_set_epi16(
50979                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50980                2, 2, 2, 2,
50981            ),
50982        )
50983    }
50984
50985    #[simd_test(enable = "avx512f")]
50986    unsafe fn test_mm512_set_epi32() {
50987        let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50988        assert_eq_m512i(
50989            r,
50990            _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
50991        )
50992    }
50993
50994    #[simd_test(enable = "avx512f")]
50995    unsafe fn test_mm512_setr_epi32() {
50996        let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50997        assert_eq_m512i(
50998            r,
50999            _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
51000        )
51001    }
51002
51003    #[simd_test(enable = "avx512f")]
51004    unsafe fn test_mm512_set1_epi8() {
51005        let r = _mm512_set_epi8(
51006            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51007            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51008            2, 2, 2, 2, 2, 2,
51009        );
51010        assert_eq_m512i(r, _mm512_set1_epi8(2));
51011    }
51012
51013    #[simd_test(enable = "avx512f")]
51014    unsafe fn test_mm512_set1_epi16() {
51015        let r = _mm512_set_epi16(
51016            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51017            2, 2, 2,
51018        );
51019        assert_eq_m512i(r, _mm512_set1_epi16(2));
51020    }
51021
51022    #[simd_test(enable = "avx512f")]
51023    unsafe fn test_mm512_set1_epi32() {
51024        let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
51025        assert_eq_m512i(r, _mm512_set1_epi32(2));
51026    }
51027
51028    #[simd_test(enable = "avx512f")]
51029    unsafe fn test_mm512_setzero_si512() {
51030        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
51031    }
51032
51033    #[simd_test(enable = "avx512f")]
51034    unsafe fn test_mm512_setzero_epi32() {
51035        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
51036    }
51037
51038    #[simd_test(enable = "avx512f")]
51039    unsafe fn test_mm512_set_ps() {
51040        let r = _mm512_setr_ps(
51041            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51042        );
51043        assert_eq_m512(
51044            r,
51045            _mm512_set_ps(
51046                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
51047            ),
51048        )
51049    }
51050
51051    #[simd_test(enable = "avx512f")]
51052    unsafe fn test_mm512_setr_ps() {
51053        let r = _mm512_set_ps(
51054            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51055        );
51056        assert_eq_m512(
51057            r,
51058            _mm512_setr_ps(
51059                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
51060            ),
51061        )
51062    }
51063
51064    #[simd_test(enable = "avx512f")]
51065    unsafe fn test_mm512_set1_ps() {
51066        #[rustfmt::skip]
51067        let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
51068                                     2., 2., 2., 2., 2., 2., 2., 2.);
51069        assert_eq_m512(expected, _mm512_set1_ps(2.));
51070    }
51071
51072    #[simd_test(enable = "avx512f")]
51073    unsafe fn test_mm512_set4_epi32() {
51074        let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
51075        assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1));
51076    }
51077
51078    #[simd_test(enable = "avx512f")]
51079    unsafe fn test_mm512_set4_ps() {
51080        let r = _mm512_set_ps(
51081            4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
51082        );
51083        assert_eq_m512(r, _mm512_set4_ps(4., 3., 2., 1.));
51084    }
51085
51086    #[simd_test(enable = "avx512f")]
51087    unsafe fn test_mm512_setr4_epi32() {
51088        let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
51089        assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4));
51090    }
51091
51092    #[simd_test(enable = "avx512f")]
51093    unsafe fn test_mm512_setr4_ps() {
51094        let r = _mm512_set_ps(
51095            4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
51096        );
51097        assert_eq_m512(r, _mm512_setr4_ps(1., 2., 3., 4.));
51098    }
51099
51100    #[simd_test(enable = "avx512f")]
51101    unsafe fn test_mm512_setzero_ps() {
51102        assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
51103    }
51104
51105    #[simd_test(enable = "avx512f")]
51106    unsafe fn test_mm512_setzero() {
51107        assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
51108    }
51109
51110    #[simd_test(enable = "avx512f")]
51111    unsafe fn test_mm512_loadu_pd() {
51112        let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
51113        let p = a.as_ptr();
51114        let r = _mm512_loadu_pd(black_box(p));
51115        let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
51116        assert_eq_m512d(r, e);
51117    }
51118
51119    #[simd_test(enable = "avx512f")]
51120    unsafe fn test_mm512_storeu_pd() {
51121        let a = _mm512_set1_pd(9.);
51122        let mut r = _mm512_undefined_pd();
51123        _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
51124        assert_eq_m512d(r, a);
51125    }
51126
51127    #[simd_test(enable = "avx512f")]
51128    unsafe fn test_mm512_loadu_ps() {
51129        let a = &[
51130            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
51131        ];
51132        let p = a.as_ptr();
51133        let r = _mm512_loadu_ps(black_box(p));
51134        let e = _mm512_setr_ps(
51135            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
51136        );
51137        assert_eq_m512(r, e);
51138    }
51139
51140    #[simd_test(enable = "avx512f")]
51141    unsafe fn test_mm512_storeu_ps() {
51142        let a = _mm512_set1_ps(9.);
51143        let mut r = _mm512_undefined_ps();
51144        _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
51145        assert_eq_m512(r, a);
51146    }
51147
51148    #[simd_test(enable = "avx512f")]
51149    unsafe fn test_mm512_mask_loadu_epi32() {
51150        let src = _mm512_set1_epi32(42);
51151        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
51152        let p = a.as_ptr();
51153        let m = 0b11101000_11001010;
51154        let r = _mm512_mask_loadu_epi32(src, m, black_box(p));
51155        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51156        assert_eq_m512i(r, e);
51157    }
51158
51159    #[simd_test(enable = "avx512f")]
51160    unsafe fn test_mm512_maskz_loadu_epi32() {
51161        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
51162        let p = a.as_ptr();
51163        let m = 0b11101000_11001010;
51164        let r = _mm512_maskz_loadu_epi32(m, black_box(p));
51165        let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
51166        assert_eq_m512i(r, e);
51167    }
51168
51169    #[simd_test(enable = "avx512f")]
51170    unsafe fn test_mm512_mask_load_epi32() {
51171        #[repr(align(64))]
51172        struct Align {
51173            data: [i32; 16], // 64 bytes
51174        }
51175        let src = _mm512_set1_epi32(42);
51176        let a = Align {
51177            data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
51178        };
51179        let p = a.data.as_ptr();
51180        let m = 0b11101000_11001010;
51181        let r = _mm512_mask_load_epi32(src, m, black_box(p));
51182        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51183        assert_eq_m512i(r, e);
51184    }
51185
51186    #[simd_test(enable = "avx512f")]
51187    unsafe fn test_mm512_maskz_load_epi32() {
51188        #[repr(align(64))]
51189        struct Align {
51190            data: [i32; 16], // 64 bytes
51191        }
51192        let a = Align {
51193            data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
51194        };
51195        let p = a.data.as_ptr();
51196        let m = 0b11101000_11001010;
51197        let r = _mm512_maskz_load_epi32(m, black_box(p));
51198        let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
51199        assert_eq_m512i(r, e);
51200    }
51201
51202    #[simd_test(enable = "avx512f")]
51203    unsafe fn test_mm512_mask_storeu_epi32() {
51204        let mut r = [42_i32; 16];
51205        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51206        let m = 0b11101000_11001010;
51207        _mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51208        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51209        assert_eq_m512i(_mm512_loadu_epi32(r.as_ptr()), e);
51210    }
51211
51212    #[simd_test(enable = "avx512f")]
51213    unsafe fn test_mm512_mask_store_epi32() {
51214        #[repr(align(64))]
51215        struct Align {
51216            data: [i32; 16],
51217        }
51218        let mut r = Align { data: [42; 16] };
51219        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51220        let m = 0b11101000_11001010;
51221        _mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51222        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51223        assert_eq_m512i(_mm512_load_epi32(r.data.as_ptr()), e);
51224    }
51225
51226    #[simd_test(enable = "avx512f")]
51227    unsafe fn test_mm512_mask_loadu_epi64() {
51228        let src = _mm512_set1_epi64(42);
51229        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
51230        let p = a.as_ptr();
51231        let m = 0b11001010;
51232        let r = _mm512_mask_loadu_epi64(src, m, black_box(p));
51233        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51234        assert_eq_m512i(r, e);
51235    }
51236
51237    #[simd_test(enable = "avx512f")]
51238    unsafe fn test_mm512_maskz_loadu_epi64() {
51239        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
51240        let p = a.as_ptr();
51241        let m = 0b11001010;
51242        let r = _mm512_maskz_loadu_epi64(m, black_box(p));
51243        let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
51244        assert_eq_m512i(r, e);
51245    }
51246
51247    #[simd_test(enable = "avx512f")]
51248    unsafe fn test_mm512_mask_load_epi64() {
51249        #[repr(align(64))]
51250        struct Align {
51251            data: [i64; 8], // 64 bytes
51252        }
51253        let src = _mm512_set1_epi64(42);
51254        let a = Align {
51255            data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
51256        };
51257        let p = a.data.as_ptr();
51258        let m = 0b11001010;
51259        let r = _mm512_mask_load_epi64(src, m, black_box(p));
51260        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51261        assert_eq_m512i(r, e);
51262    }
51263
51264    #[simd_test(enable = "avx512f")]
51265    unsafe fn test_mm512_maskz_load_epi64() {
51266        #[repr(align(64))]
51267        struct Align {
51268            data: [i64; 8], // 64 bytes
51269        }
51270        let a = Align {
51271            data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
51272        };
51273        let p = a.data.as_ptr();
51274        let m = 0b11001010;
51275        let r = _mm512_maskz_load_epi64(m, black_box(p));
51276        let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
51277        assert_eq_m512i(r, e);
51278    }
51279
51280    #[simd_test(enable = "avx512f")]
51281    unsafe fn test_mm512_mask_storeu_epi64() {
51282        let mut r = [42_i64; 8];
51283        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
51284        let m = 0b11001010;
51285        _mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51286        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51287        assert_eq_m512i(_mm512_loadu_epi64(r.as_ptr()), e);
51288    }
51289
51290    #[simd_test(enable = "avx512f")]
51291    unsafe fn test_mm512_mask_store_epi64() {
51292        #[repr(align(64))]
51293        struct Align {
51294            data: [i64; 8],
51295        }
51296        let mut r = Align { data: [42; 8] };
51297        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
51298        let m = 0b11001010;
51299        let p = r.data.as_mut_ptr();
51300        _mm512_mask_store_epi64(p, m, a);
51301        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51302        assert_eq_m512i(_mm512_load_epi64(r.data.as_ptr()), e);
51303    }
51304
51305    #[simd_test(enable = "avx512f")]
51306    unsafe fn test_mm512_mask_loadu_ps() {
51307        let src = _mm512_set1_ps(42.0);
51308        let a = &[
51309            1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
51310            16.0,
51311        ];
51312        let p = a.as_ptr();
51313        let m = 0b11101000_11001010;
51314        let r = _mm512_mask_loadu_ps(src, m, black_box(p));
51315        let e = _mm512_setr_ps(
51316            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51317            16.0,
51318        );
51319        assert_eq_m512(r, e);
51320    }
51321
51322    #[simd_test(enable = "avx512f")]
51323    unsafe fn test_mm512_maskz_loadu_ps() {
51324        let a = &[
51325            1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
51326            16.0,
51327        ];
51328        let p = a.as_ptr();
51329        let m = 0b11101000_11001010;
51330        let r = _mm512_maskz_loadu_ps(m, black_box(p));
51331        let e = _mm512_setr_ps(
51332            0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
51333        );
51334        assert_eq_m512(r, e);
51335    }
51336
51337    #[simd_test(enable = "avx512f")]
51338    unsafe fn test_mm512_mask_load_ps() {
51339        #[repr(align(64))]
51340        struct Align {
51341            data: [f32; 16], // 64 bytes
51342        }
51343        let src = _mm512_set1_ps(42.0);
51344        let a = Align {
51345            data: [
51346                1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
51347                15.0, 16.0,
51348            ],
51349        };
51350        let p = a.data.as_ptr();
51351        let m = 0b11101000_11001010;
51352        let r = _mm512_mask_load_ps(src, m, black_box(p));
51353        let e = _mm512_setr_ps(
51354            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51355            16.0,
51356        );
51357        assert_eq_m512(r, e);
51358    }
51359
51360    #[simd_test(enable = "avx512f")]
51361    unsafe fn test_mm512_maskz_load_ps() {
51362        #[repr(align(64))]
51363        struct Align {
51364            data: [f32; 16], // 64 bytes
51365        }
51366        let a = Align {
51367            data: [
51368                1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
51369                15.0, 16.0,
51370            ],
51371        };
51372        let p = a.data.as_ptr();
51373        let m = 0b11101000_11001010;
51374        let r = _mm512_maskz_load_ps(m, black_box(p));
51375        let e = _mm512_setr_ps(
51376            0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
51377        );
51378        assert_eq_m512(r, e);
51379    }
51380
51381    #[simd_test(enable = "avx512f")]
51382    unsafe fn test_mm512_mask_storeu_ps() {
51383        let mut r = [42_f32; 16];
51384        let a = _mm512_setr_ps(
51385            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
51386        );
51387        let m = 0b11101000_11001010;
51388        _mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
51389        let e = _mm512_setr_ps(
51390            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51391            16.0,
51392        );
51393        assert_eq_m512(_mm512_loadu_ps(r.as_ptr()), e);
51394    }
51395
51396    #[simd_test(enable = "avx512f")]
51397    unsafe fn test_mm512_mask_store_ps() {
51398        #[repr(align(64))]
51399        struct Align {
51400            data: [f32; 16],
51401        }
51402        let mut r = Align { data: [42.0; 16] };
51403        let a = _mm512_setr_ps(
51404            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
51405        );
51406        let m = 0b11101000_11001010;
51407        _mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
51408        let e = _mm512_setr_ps(
51409            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51410            16.0,
51411        );
51412        assert_eq_m512(_mm512_load_ps(r.data.as_ptr()), e);
51413    }
51414
51415    #[simd_test(enable = "avx512f")]
51416    unsafe fn test_mm512_mask_loadu_pd() {
51417        let src = _mm512_set1_pd(42.0);
51418        let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51419        let p = a.as_ptr();
51420        let m = 0b11001010;
51421        let r = _mm512_mask_loadu_pd(src, m, black_box(p));
51422        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51423        assert_eq_m512d(r, e);
51424    }
51425
51426    #[simd_test(enable = "avx512f")]
51427    unsafe fn test_mm512_maskz_loadu_pd() {
51428        let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51429        let p = a.as_ptr();
51430        let m = 0b11001010;
51431        let r = _mm512_maskz_loadu_pd(m, black_box(p));
51432        let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51433        assert_eq_m512d(r, e);
51434    }
51435
51436    #[simd_test(enable = "avx512f")]
51437    unsafe fn test_mm512_mask_load_pd() {
51438        #[repr(align(64))]
51439        struct Align {
51440            data: [f64; 8], // 64 bytes
51441        }
51442        let src = _mm512_set1_pd(42.0);
51443        let a = Align {
51444            data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51445        };
51446        let p = a.data.as_ptr();
51447        let m = 0b11001010;
51448        let r = _mm512_mask_load_pd(src, m, black_box(p));
51449        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51450        assert_eq_m512d(r, e);
51451    }
51452
51453    #[simd_test(enable = "avx512f")]
51454    unsafe fn test_mm512_maskz_load_pd() {
51455        #[repr(align(64))]
51456        struct Align {
51457            data: [f64; 8], // 64 bytes
51458        }
51459        let a = Align {
51460            data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51461        };
51462        let p = a.data.as_ptr();
51463        let m = 0b11001010;
51464        let r = _mm512_maskz_load_pd(m, black_box(p));
51465        let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51466        assert_eq_m512d(r, e);
51467    }
51468
51469    #[simd_test(enable = "avx512f")]
51470    unsafe fn test_mm512_mask_storeu_pd() {
51471        let mut r = [42_f64; 8];
51472        let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51473        let m = 0b11001010;
51474        _mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
51475        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51476        assert_eq_m512d(_mm512_loadu_pd(r.as_ptr()), e);
51477    }
51478
51479    #[simd_test(enable = "avx512f")]
51480    unsafe fn test_mm512_mask_store_pd() {
51481        #[repr(align(64))]
51482        struct Align {
51483            data: [f64; 8],
51484        }
51485        let mut r = Align { data: [42.0; 8] };
51486        let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51487        let m = 0b11001010;
51488        _mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
51489        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51490        assert_eq_m512d(_mm512_load_pd(r.data.as_ptr()), e);
51491    }
51492
51493    #[simd_test(enable = "avx512f,avx512vl")]
51494    unsafe fn test_mm256_mask_loadu_epi32() {
51495        let src = _mm256_set1_epi32(42);
51496        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
51497        let p = a.as_ptr();
51498        let m = 0b11001010;
51499        let r = _mm256_mask_loadu_epi32(src, m, black_box(p));
51500        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51501        assert_eq_m256i(r, e);
51502    }
51503
51504    #[simd_test(enable = "avx512f,avx512vl")]
51505    unsafe fn test_mm256_maskz_loadu_epi32() {
51506        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
51507        let p = a.as_ptr();
51508        let m = 0b11001010;
51509        let r = _mm256_maskz_loadu_epi32(m, black_box(p));
51510        let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
51511        assert_eq_m256i(r, e);
51512    }
51513
51514    #[simd_test(enable = "avx512f,avx512vl")]
51515    unsafe fn test_mm256_mask_load_epi32() {
51516        #[repr(align(32))]
51517        struct Align {
51518            data: [i32; 8], // 32 bytes
51519        }
51520        let src = _mm256_set1_epi32(42);
51521        let a = Align {
51522            data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
51523        };
51524        let p = a.data.as_ptr();
51525        let m = 0b11001010;
51526        let r = _mm256_mask_load_epi32(src, m, black_box(p));
51527        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51528        assert_eq_m256i(r, e);
51529    }
51530
51531    #[simd_test(enable = "avx512f,avx512vl")]
51532    unsafe fn test_mm256_maskz_load_epi32() {
51533        #[repr(align(32))]
51534        struct Align {
51535            data: [i32; 8], // 32 bytes
51536        }
51537        let a = Align {
51538            data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
51539        };
51540        let p = a.data.as_ptr();
51541        let m = 0b11001010;
51542        let r = _mm256_maskz_load_epi32(m, black_box(p));
51543        let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
51544        assert_eq_m256i(r, e);
51545    }
51546
51547    #[simd_test(enable = "avx512f,avx512vl")]
51548    unsafe fn test_mm256_mask_storeu_epi32() {
51549        let mut r = [42_i32; 8];
51550        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
51551        let m = 0b11001010;
51552        _mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51553        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51554        assert_eq_m256i(_mm256_loadu_epi32(r.as_ptr()), e);
51555    }
51556
51557    #[simd_test(enable = "avx512f,avx512vl")]
51558    unsafe fn test_mm256_mask_store_epi32() {
51559        #[repr(align(64))]
51560        struct Align {
51561            data: [i32; 8],
51562        }
51563        let mut r = Align { data: [42; 8] };
51564        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
51565        let m = 0b11001010;
51566        _mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51567        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51568        assert_eq_m256i(_mm256_load_epi32(r.data.as_ptr()), e);
51569    }
51570
51571    #[simd_test(enable = "avx512f,avx512vl")]
51572    unsafe fn test_mm256_mask_loadu_epi64() {
51573        let src = _mm256_set1_epi64x(42);
51574        let a = &[1_i64, 2, 3, 4];
51575        let p = a.as_ptr();
51576        let m = 0b1010;
51577        let r = _mm256_mask_loadu_epi64(src, m, black_box(p));
51578        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51579        assert_eq_m256i(r, e);
51580    }
51581
51582    #[simd_test(enable = "avx512f,avx512vl")]
51583    unsafe fn test_mm256_maskz_loadu_epi64() {
51584        let a = &[1_i64, 2, 3, 4];
51585        let p = a.as_ptr();
51586        let m = 0b1010;
51587        let r = _mm256_maskz_loadu_epi64(m, black_box(p));
51588        let e = _mm256_setr_epi64x(0, 2, 0, 4);
51589        assert_eq_m256i(r, e);
51590    }
51591
51592    #[simd_test(enable = "avx512f,avx512vl")]
51593    unsafe fn test_mm256_mask_load_epi64() {
51594        #[repr(align(32))]
51595        struct Align {
51596            data: [i64; 4], // 32 bytes
51597        }
51598        let src = _mm256_set1_epi64x(42);
51599        let a = Align {
51600            data: [1_i64, 2, 3, 4],
51601        };
51602        let p = a.data.as_ptr();
51603        let m = 0b1010;
51604        let r = _mm256_mask_load_epi64(src, m, black_box(p));
51605        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51606        assert_eq_m256i(r, e);
51607    }
51608
51609    #[simd_test(enable = "avx512f,avx512vl")]
51610    unsafe fn test_mm256_maskz_load_epi64() {
51611        #[repr(align(32))]
51612        struct Align {
51613            data: [i64; 4], // 32 bytes
51614        }
51615        let a = Align {
51616            data: [1_i64, 2, 3, 4],
51617        };
51618        let p = a.data.as_ptr();
51619        let m = 0b1010;
51620        let r = _mm256_maskz_load_epi64(m, black_box(p));
51621        let e = _mm256_setr_epi64x(0, 2, 0, 4);
51622        assert_eq_m256i(r, e);
51623    }
51624
51625    #[simd_test(enable = "avx512f,avx512vl")]
51626    unsafe fn test_mm256_mask_storeu_epi64() {
51627        let mut r = [42_i64; 4];
51628        let a = _mm256_setr_epi64x(1, 2, 3, 4);
51629        let m = 0b1010;
51630        _mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51631        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51632        assert_eq_m256i(_mm256_loadu_epi64(r.as_ptr()), e);
51633    }
51634
51635    #[simd_test(enable = "avx512f,avx512vl")]
51636    unsafe fn test_mm256_mask_store_epi64() {
51637        #[repr(align(32))]
51638        struct Align {
51639            data: [i64; 4],
51640        }
51641        let mut r = Align { data: [42; 4] };
51642        let a = _mm256_setr_epi64x(1, 2, 3, 4);
51643        let m = 0b1010;
51644        _mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
51645        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51646        assert_eq_m256i(_mm256_load_epi64(r.data.as_ptr()), e);
51647    }
51648
51649    #[simd_test(enable = "avx512f,avx512vl")]
51650    unsafe fn test_mm256_mask_loadu_ps() {
51651        let src = _mm256_set1_ps(42.0);
51652        let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51653        let p = a.as_ptr();
51654        let m = 0b11001010;
51655        let r = _mm256_mask_loadu_ps(src, m, black_box(p));
51656        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51657        assert_eq_m256(r, e);
51658    }
51659
51660    #[simd_test(enable = "avx512f,avx512vl")]
51661    unsafe fn test_mm256_maskz_loadu_ps() {
51662        let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51663        let p = a.as_ptr();
51664        let m = 0b11001010;
51665        let r = _mm256_maskz_loadu_ps(m, black_box(p));
51666        let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51667        assert_eq_m256(r, e);
51668    }
51669
51670    #[simd_test(enable = "avx512f,avx512vl")]
51671    unsafe fn test_mm256_mask_load_ps() {
51672        #[repr(align(32))]
51673        struct Align {
51674            data: [f32; 8], // 32 bytes
51675        }
51676        let src = _mm256_set1_ps(42.0);
51677        let a = Align {
51678            data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51679        };
51680        let p = a.data.as_ptr();
51681        let m = 0b11001010;
51682        let r = _mm256_mask_load_ps(src, m, black_box(p));
51683        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51684        assert_eq_m256(r, e);
51685    }
51686
51687    #[simd_test(enable = "avx512f,avx512vl")]
51688    unsafe fn test_mm256_maskz_load_ps() {
51689        #[repr(align(32))]
51690        struct Align {
51691            data: [f32; 8], // 32 bytes
51692        }
51693        let a = Align {
51694            data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51695        };
51696        let p = a.data.as_ptr();
51697        let m = 0b11001010;
51698        let r = _mm256_maskz_load_ps(m, black_box(p));
51699        let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51700        assert_eq_m256(r, e);
51701    }
51702
51703    #[simd_test(enable = "avx512f,avx512vl")]
51704    unsafe fn test_mm256_mask_storeu_ps() {
51705        let mut r = [42_f32; 8];
51706        let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51707        let m = 0b11001010;
51708        _mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
51709        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51710        assert_eq_m256(_mm256_loadu_ps(r.as_ptr()), e);
51711    }
51712
51713    #[simd_test(enable = "avx512f,avx512vl")]
51714    unsafe fn test_mm256_mask_store_ps() {
51715        #[repr(align(32))]
51716        struct Align {
51717            data: [f32; 8],
51718        }
51719        let mut r = Align { data: [42.0; 8] };
51720        let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51721        let m = 0b11001010;
51722        _mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
51723        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51724        assert_eq_m256(_mm256_load_ps(r.data.as_ptr()), e);
51725    }
51726
51727    #[simd_test(enable = "avx512f,avx512vl")]
51728    unsafe fn test_mm256_mask_loadu_pd() {
51729        let src = _mm256_set1_pd(42.0);
51730        let a = &[1.0_f64, 2.0, 3.0, 4.0];
51731        let p = a.as_ptr();
51732        let m = 0b1010;
51733        let r = _mm256_mask_loadu_pd(src, m, black_box(p));
51734        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51735        assert_eq_m256d(r, e);
51736    }
51737
51738    #[simd_test(enable = "avx512f,avx512vl")]
51739    unsafe fn test_mm256_maskz_loadu_pd() {
51740        let a = &[1.0_f64, 2.0, 3.0, 4.0];
51741        let p = a.as_ptr();
51742        let m = 0b1010;
51743        let r = _mm256_maskz_loadu_pd(m, black_box(p));
51744        let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
51745        assert_eq_m256d(r, e);
51746    }
51747
51748    #[simd_test(enable = "avx512f,avx512vl")]
51749    unsafe fn test_mm256_mask_load_pd() {
51750        #[repr(align(32))]
51751        struct Align {
51752            data: [f64; 4], // 32 bytes
51753        }
51754        let src = _mm256_set1_pd(42.0);
51755        let a = Align {
51756            data: [1.0_f64, 2.0, 3.0, 4.0],
51757        };
51758        let p = a.data.as_ptr();
51759        let m = 0b1010;
51760        let r = _mm256_mask_load_pd(src, m, black_box(p));
51761        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51762        assert_eq_m256d(r, e);
51763    }
51764
51765    #[simd_test(enable = "avx512f,avx512vl")]
51766    unsafe fn test_mm256_maskz_load_pd() {
51767        #[repr(align(32))]
51768        struct Align {
51769            data: [f64; 4], // 32 bytes
51770        }
51771        let a = Align {
51772            data: [1.0_f64, 2.0, 3.0, 4.0],
51773        };
51774        let p = a.data.as_ptr();
51775        let m = 0b1010;
51776        let r = _mm256_maskz_load_pd(m, black_box(p));
51777        let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
51778        assert_eq_m256d(r, e);
51779    }
51780
51781    #[simd_test(enable = "avx512f,avx512vl")]
51782    unsafe fn test_mm256_mask_storeu_pd() {
51783        let mut r = [42_f64; 4];
51784        let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
51785        let m = 0b1010;
51786        _mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
51787        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51788        assert_eq_m256d(_mm256_loadu_pd(r.as_ptr()), e);
51789    }
51790
51791    #[simd_test(enable = "avx512f,avx512vl")]
51792    unsafe fn test_mm256_mask_store_pd() {
51793        #[repr(align(32))]
51794        struct Align {
51795            data: [f64; 4],
51796        }
51797        let mut r = Align { data: [42.0; 4] };
51798        let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
51799        let m = 0b1010;
51800        _mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
51801        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51802        assert_eq_m256d(_mm256_load_pd(r.data.as_ptr()), e);
51803    }
51804
51805    #[simd_test(enable = "avx512f,avx512vl")]
51806    unsafe fn test_mm_mask_loadu_epi32() {
51807        let src = _mm_set1_epi32(42);
51808        let a = &[1_i32, 2, 3, 4];
51809        let p = a.as_ptr();
51810        let m = 0b1010;
51811        let r = _mm_mask_loadu_epi32(src, m, black_box(p));
51812        let e = _mm_setr_epi32(42, 2, 42, 4);
51813        assert_eq_m128i(r, e);
51814    }
51815
51816    #[simd_test(enable = "avx512f,avx512vl")]
51817    unsafe fn test_mm_maskz_loadu_epi32() {
51818        let a = &[1_i32, 2, 3, 4];
51819        let p = a.as_ptr();
51820        let m = 0b1010;
51821        let r = _mm_maskz_loadu_epi32(m, black_box(p));
51822        let e = _mm_setr_epi32(0, 2, 0, 4);
51823        assert_eq_m128i(r, e);
51824    }
51825
51826    #[simd_test(enable = "avx512f,avx512vl")]
51827    unsafe fn test_mm_mask_load_epi32() {
51828        #[repr(align(16))]
51829        struct Align {
51830            data: [i32; 4], // 32 bytes
51831        }
51832        let src = _mm_set1_epi32(42);
51833        let a = Align {
51834            data: [1_i32, 2, 3, 4],
51835        };
51836        let p = a.data.as_ptr();
51837        let m = 0b1010;
51838        let r = _mm_mask_load_epi32(src, m, black_box(p));
51839        let e = _mm_setr_epi32(42, 2, 42, 4);
51840        assert_eq_m128i(r, e);
51841    }
51842
51843    #[simd_test(enable = "avx512f,avx512vl")]
51844    unsafe fn test_mm_maskz_load_epi32() {
51845        #[repr(align(16))]
51846        struct Align {
51847            data: [i32; 4], // 16 bytes
51848        }
51849        let a = Align {
51850            data: [1_i32, 2, 3, 4],
51851        };
51852        let p = a.data.as_ptr();
51853        let m = 0b1010;
51854        let r = _mm_maskz_load_epi32(m, black_box(p));
51855        let e = _mm_setr_epi32(0, 2, 0, 4);
51856        assert_eq_m128i(r, e);
51857    }
51858
51859    #[simd_test(enable = "avx512f,avx512vl")]
51860    unsafe fn test_mm_mask_storeu_epi32() {
51861        let mut r = [42_i32; 4];
51862        let a = _mm_setr_epi32(1, 2, 3, 4);
51863        let m = 0b1010;
51864        _mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51865        let e = _mm_setr_epi32(42, 2, 42, 4);
51866        assert_eq_m128i(_mm_loadu_epi32(r.as_ptr()), e);
51867    }
51868
51869    #[simd_test(enable = "avx512f,avx512vl")]
51870    unsafe fn test_mm_mask_store_epi32() {
51871        #[repr(align(16))]
51872        struct Align {
51873            data: [i32; 4], // 16 bytes
51874        }
51875        let mut r = Align { data: [42; 4] };
51876        let a = _mm_setr_epi32(1, 2, 3, 4);
51877        let m = 0b1010;
51878        _mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51879        let e = _mm_setr_epi32(42, 2, 42, 4);
51880        assert_eq_m128i(_mm_load_epi32(r.data.as_ptr()), e);
51881    }
51882
51883    #[simd_test(enable = "avx512f,avx512vl")]
51884    unsafe fn test_mm_mask_loadu_epi64() {
51885        let src = _mm_set1_epi64x(42);
51886        let a = &[1_i64, 2];
51887        let p = a.as_ptr();
51888        let m = 0b10;
51889        let r = _mm_mask_loadu_epi64(src, m, black_box(p));
51890        let e = _mm_setr_epi64x(42, 2);
51891        assert_eq_m128i(r, e);
51892    }
51893
51894    #[simd_test(enable = "avx512f,avx512vl")]
51895    unsafe fn test_mm_maskz_loadu_epi64() {
51896        let a = &[1_i64, 2];
51897        let p = a.as_ptr();
51898        let m = 0b10;
51899        let r = _mm_maskz_loadu_epi64(m, black_box(p));
51900        let e = _mm_setr_epi64x(0, 2);
51901        assert_eq_m128i(r, e);
51902    }
51903
51904    #[simd_test(enable = "avx512f,avx512vl")]
51905    unsafe fn test_mm_mask_load_epi64() {
51906        #[repr(align(16))]
51907        struct Align {
51908            data: [i64; 2], // 16 bytes
51909        }
51910        let src = _mm_set1_epi64x(42);
51911        let a = Align { data: [1_i64, 2] };
51912        let p = a.data.as_ptr();
51913        let m = 0b10;
51914        let r = _mm_mask_load_epi64(src, m, black_box(p));
51915        let e = _mm_setr_epi64x(42, 2);
51916        assert_eq_m128i(r, e);
51917    }
51918
51919    #[simd_test(enable = "avx512f,avx512vl")]
51920    unsafe fn test_mm_maskz_load_epi64() {
51921        #[repr(align(16))]
51922        struct Align {
51923            data: [i64; 2], // 16 bytes
51924        }
51925        let a = Align { data: [1_i64, 2] };
51926        let p = a.data.as_ptr();
51927        let m = 0b10;
51928        let r = _mm_maskz_load_epi64(m, black_box(p));
51929        let e = _mm_setr_epi64x(0, 2);
51930        assert_eq_m128i(r, e);
51931    }
51932
51933    #[simd_test(enable = "avx512f,avx512vl")]
51934    unsafe fn test_mm_mask_storeu_epi64() {
51935        let mut r = [42_i64; 2];
51936        let a = _mm_setr_epi64x(1, 2);
51937        let m = 0b10;
51938        _mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51939        let e = _mm_setr_epi64x(42, 2);
51940        assert_eq_m128i(_mm_loadu_epi64(r.as_ptr()), e);
51941    }
51942
51943    #[simd_test(enable = "avx512f,avx512vl")]
51944    unsafe fn test_mm_mask_store_epi64() {
51945        #[repr(align(16))]
51946        struct Align {
51947            data: [i64; 2], // 16 bytes
51948        }
51949        let mut r = Align { data: [42; 2] };
51950        let a = _mm_setr_epi64x(1, 2);
51951        let m = 0b10;
51952        _mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
51953        let e = _mm_setr_epi64x(42, 2);
51954        assert_eq_m128i(_mm_load_epi64(r.data.as_ptr()), e);
51955    }
51956
51957    #[simd_test(enable = "avx512f,avx512vl")]
51958    unsafe fn test_mm_mask_loadu_ps() {
51959        let src = _mm_set1_ps(42.0);
51960        let a = &[1.0_f32, 2.0, 3.0, 4.0];
51961        let p = a.as_ptr();
51962        let m = 0b1010;
51963        let r = _mm_mask_loadu_ps(src, m, black_box(p));
51964        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51965        assert_eq_m128(r, e);
51966    }
51967
51968    #[simd_test(enable = "avx512f,avx512vl")]
51969    unsafe fn test_mm_maskz_loadu_ps() {
51970        let a = &[1.0_f32, 2.0, 3.0, 4.0];
51971        let p = a.as_ptr();
51972        let m = 0b1010;
51973        let r = _mm_maskz_loadu_ps(m, black_box(p));
51974        let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
51975        assert_eq_m128(r, e);
51976    }
51977
51978    #[simd_test(enable = "avx512f,avx512vl")]
51979    unsafe fn test_mm_mask_load_ps() {
51980        #[repr(align(16))]
51981        struct Align {
51982            data: [f32; 4], // 16 bytes
51983        }
51984        let src = _mm_set1_ps(42.0);
51985        let a = Align {
51986            data: [1.0_f32, 2.0, 3.0, 4.0],
51987        };
51988        let p = a.data.as_ptr();
51989        let m = 0b1010;
51990        let r = _mm_mask_load_ps(src, m, black_box(p));
51991        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51992        assert_eq_m128(r, e);
51993    }
51994
51995    #[simd_test(enable = "avx512f,avx512vl")]
51996    unsafe fn test_mm_maskz_load_ps() {
51997        #[repr(align(16))]
51998        struct Align {
51999            data: [f32; 4], // 16 bytes
52000        }
52001        let a = Align {
52002            data: [1.0_f32, 2.0, 3.0, 4.0],
52003        };
52004        let p = a.data.as_ptr();
52005        let m = 0b1010;
52006        let r = _mm_maskz_load_ps(m, black_box(p));
52007        let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
52008        assert_eq_m128(r, e);
52009    }
52010
52011    #[simd_test(enable = "avx512f,avx512vl")]
52012    unsafe fn test_mm_mask_storeu_ps() {
52013        let mut r = [42_f32; 4];
52014        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
52015        let m = 0b1010;
52016        _mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
52017        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
52018        assert_eq_m128(_mm_loadu_ps(r.as_ptr()), e);
52019    }
52020
52021    #[simd_test(enable = "avx512f,avx512vl")]
52022    unsafe fn test_mm_mask_store_ps() {
52023        #[repr(align(16))]
52024        struct Align {
52025            data: [f32; 4], // 16 bytes
52026        }
52027        let mut r = Align { data: [42.0; 4] };
52028        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
52029        let m = 0b1010;
52030        _mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
52031        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
52032        assert_eq_m128(_mm_load_ps(r.data.as_ptr()), e);
52033    }
52034
52035    #[simd_test(enable = "avx512f,avx512vl")]
52036    unsafe fn test_mm_mask_loadu_pd() {
52037        let src = _mm_set1_pd(42.0);
52038        let a = &[1.0_f64, 2.0];
52039        let p = a.as_ptr();
52040        let m = 0b10;
52041        let r = _mm_mask_loadu_pd(src, m, black_box(p));
52042        let e = _mm_setr_pd(42.0, 2.0);
52043        assert_eq_m128d(r, e);
52044    }
52045
52046    #[simd_test(enable = "avx512f,avx512vl")]
52047    unsafe fn test_mm_maskz_loadu_pd() {
52048        let a = &[1.0_f64, 2.0];
52049        let p = a.as_ptr();
52050        let m = 0b10;
52051        let r = _mm_maskz_loadu_pd(m, black_box(p));
52052        let e = _mm_setr_pd(0.0, 2.0);
52053        assert_eq_m128d(r, e);
52054    }
52055
52056    #[simd_test(enable = "avx512f,avx512vl")]
52057    unsafe fn test_mm_mask_load_pd() {
52058        #[repr(align(16))]
52059        struct Align {
52060            data: [f64; 2], // 16 bytes
52061        }
52062        let src = _mm_set1_pd(42.0);
52063        let a = Align {
52064            data: [1.0_f64, 2.0],
52065        };
52066        let p = a.data.as_ptr();
52067        let m = 0b10;
52068        let r = _mm_mask_load_pd(src, m, black_box(p));
52069        let e = _mm_setr_pd(42.0, 2.0);
52070        assert_eq_m128d(r, e);
52071    }
52072
52073    #[simd_test(enable = "avx512f,avx512vl")]
52074    unsafe fn test_mm_maskz_load_pd() {
52075        #[repr(align(16))]
52076        struct Align {
52077            data: [f64; 2], // 16 bytes
52078        }
52079        let a = Align {
52080            data: [1.0_f64, 2.0],
52081        };
52082        let p = a.data.as_ptr();
52083        let m = 0b10;
52084        let r = _mm_maskz_load_pd(m, black_box(p));
52085        let e = _mm_setr_pd(0.0, 2.0);
52086        assert_eq_m128d(r, e);
52087    }
52088
52089    #[simd_test(enable = "avx512f")]
52090    unsafe fn test_mm_mask_load_ss() {
52091        #[repr(align(16))]
52092        struct Align {
52093            data: f32,
52094        }
52095        let src = _mm_set_ss(2.0);
52096        let mem = Align { data: 1.0 };
52097        let r = _mm_mask_load_ss(src, 0b1, &mem.data);
52098        assert_eq_m128(r, _mm_set_ss(1.0));
52099        let r = _mm_mask_load_ss(src, 0b0, &mem.data);
52100        assert_eq_m128(r, _mm_set_ss(2.0));
52101    }
52102
52103    #[simd_test(enable = "avx512f")]
52104    unsafe fn test_mm_maskz_load_ss() {
52105        #[repr(align(16))]
52106        struct Align {
52107            data: f32,
52108        }
52109        let mem = Align { data: 1.0 };
52110        let r = _mm_maskz_load_ss(0b1, &mem.data);
52111        assert_eq_m128(r, _mm_set_ss(1.0));
52112        let r = _mm_maskz_load_ss(0b0, &mem.data);
52113        assert_eq_m128(r, _mm_set_ss(0.0));
52114    }
52115
52116    #[simd_test(enable = "avx512f")]
52117    unsafe fn test_mm_mask_load_sd() {
52118        #[repr(align(16))]
52119        struct Align {
52120            data: f64,
52121        }
52122        let src = _mm_set_sd(2.0);
52123        let mem = Align { data: 1.0 };
52124        let r = _mm_mask_load_sd(src, 0b1, &mem.data);
52125        assert_eq_m128d(r, _mm_set_sd(1.0));
52126        let r = _mm_mask_load_sd(src, 0b0, &mem.data);
52127        assert_eq_m128d(r, _mm_set_sd(2.0));
52128    }
52129
52130    #[simd_test(enable = "avx512f")]
52131    unsafe fn test_mm_maskz_load_sd() {
52132        #[repr(align(16))]
52133        struct Align {
52134            data: f64,
52135        }
52136        let mem = Align { data: 1.0 };
52137        let r = _mm_maskz_load_sd(0b1, &mem.data);
52138        assert_eq_m128d(r, _mm_set_sd(1.0));
52139        let r = _mm_maskz_load_sd(0b0, &mem.data);
52140        assert_eq_m128d(r, _mm_set_sd(0.0));
52141    }
52142
52143    #[simd_test(enable = "avx512f,avx512vl")]
52144    unsafe fn test_mm_mask_storeu_pd() {
52145        let mut r = [42_f64; 2];
52146        let a = _mm_setr_pd(1.0, 2.0);
52147        let m = 0b10;
52148        _mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
52149        let e = _mm_setr_pd(42.0, 2.0);
52150        assert_eq_m128d(_mm_loadu_pd(r.as_ptr()), e);
52151    }
52152
52153    #[simd_test(enable = "avx512f,avx512vl")]
52154    unsafe fn test_mm_mask_store_pd() {
52155        #[repr(align(16))]
52156        struct Align {
52157            data: [f64; 2], // 16 bytes
52158        }
52159        let mut r = Align { data: [42.0; 2] };
52160        let a = _mm_setr_pd(1.0, 2.0);
52161        let m = 0b10;
52162        _mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
52163        let e = _mm_setr_pd(42.0, 2.0);
52164        assert_eq_m128d(_mm_load_pd(r.data.as_ptr()), e);
52165    }
52166
52167    #[simd_test(enable = "avx512f")]
52168    unsafe fn test_mm_mask_store_ss() {
52169        #[repr(align(16))]
52170        struct Align {
52171            data: f32,
52172        }
52173        let a = _mm_set_ss(2.0);
52174        let mut mem = Align { data: 1.0 };
52175        _mm_mask_store_ss(&mut mem.data, 0b1, a);
52176        assert_eq!(mem.data, 2.0);
52177        _mm_mask_store_ss(&mut mem.data, 0b0, a);
52178        assert_eq!(mem.data, 2.0);
52179    }
52180
52181    #[simd_test(enable = "avx512f")]
52182    unsafe fn test_mm_mask_store_sd() {
52183        #[repr(align(16))]
52184        struct Align {
52185            data: f64,
52186        }
52187        let a = _mm_set_sd(2.0);
52188        let mut mem = Align { data: 1.0 };
52189        _mm_mask_store_sd(&mut mem.data, 0b1, a);
52190        assert_eq!(mem.data, 2.0);
52191        _mm_mask_store_sd(&mut mem.data, 0b0, a);
52192        assert_eq!(mem.data, 2.0);
52193    }
52194
52195    #[simd_test(enable = "avx512f")]
52196    unsafe fn test_mm512_setr_pd() {
52197        let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
52198        assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
52199    }
52200
52201    #[simd_test(enable = "avx512f")]
52202    unsafe fn test_mm512_set_pd() {
52203        let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
52204        assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
52205    }
52206
52207    #[simd_test(enable = "avx512f")]
52208    unsafe fn test_mm512_rol_epi32() {
52209        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52210        let r = _mm512_rol_epi32::<1>(a);
52211        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52212        assert_eq_m512i(r, e);
52213    }
52214
52215    #[simd_test(enable = "avx512f")]
52216    unsafe fn test_mm512_mask_rol_epi32() {
52217        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52218        let r = _mm512_mask_rol_epi32::<1>(a, 0, a);
52219        assert_eq_m512i(r, a);
52220        let r = _mm512_mask_rol_epi32::<1>(a, 0b11111111_11111111, a);
52221        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52222        assert_eq_m512i(r, e);
52223    }
52224
52225    #[simd_test(enable = "avx512f")]
52226    unsafe fn test_mm512_maskz_rol_epi32() {
52227        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52228        let r = _mm512_maskz_rol_epi32::<1>(0, a);
52229        assert_eq_m512i(r, _mm512_setzero_si512());
52230        let r = _mm512_maskz_rol_epi32::<1>(0b00000000_11111111, a);
52231        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52232        assert_eq_m512i(r, e);
52233    }
52234
52235    #[simd_test(enable = "avx512f,avx512vl")]
52236    unsafe fn test_mm256_rol_epi32() {
52237        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52238        let r = _mm256_rol_epi32::<1>(a);
52239        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52240        assert_eq_m256i(r, e);
52241    }
52242
52243    #[simd_test(enable = "avx512f,avx512vl")]
52244    unsafe fn test_mm256_mask_rol_epi32() {
52245        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52246        let r = _mm256_mask_rol_epi32::<1>(a, 0, a);
52247        assert_eq_m256i(r, a);
52248        let r = _mm256_mask_rol_epi32::<1>(a, 0b11111111, a);
52249        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52250        assert_eq_m256i(r, e);
52251    }
52252
52253    #[simd_test(enable = "avx512f,avx512vl")]
52254    unsafe fn test_mm256_maskz_rol_epi32() {
52255        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52256        let r = _mm256_maskz_rol_epi32::<1>(0, a);
52257        assert_eq_m256i(r, _mm256_setzero_si256());
52258        let r = _mm256_maskz_rol_epi32::<1>(0b11111111, a);
52259        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52260        assert_eq_m256i(r, e);
52261    }
52262
52263    #[simd_test(enable = "avx512f,avx512vl")]
52264    unsafe fn test_mm_rol_epi32() {
52265        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52266        let r = _mm_rol_epi32::<1>(a);
52267        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52268        assert_eq_m128i(r, e);
52269    }
52270
52271    #[simd_test(enable = "avx512f,avx512vl")]
52272    unsafe fn test_mm_mask_rol_epi32() {
52273        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52274        let r = _mm_mask_rol_epi32::<1>(a, 0, a);
52275        assert_eq_m128i(r, a);
52276        let r = _mm_mask_rol_epi32::<1>(a, 0b00001111, a);
52277        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52278        assert_eq_m128i(r, e);
52279    }
52280
52281    #[simd_test(enable = "avx512f,avx512vl")]
52282    unsafe fn test_mm_maskz_rol_epi32() {
52283        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52284        let r = _mm_maskz_rol_epi32::<1>(0, a);
52285        assert_eq_m128i(r, _mm_setzero_si128());
52286        let r = _mm_maskz_rol_epi32::<1>(0b00001111, a);
52287        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52288        assert_eq_m128i(r, e);
52289    }
52290
52291    #[simd_test(enable = "avx512f")]
52292    unsafe fn test_mm512_ror_epi32() {
52293        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52294        let r = _mm512_ror_epi32::<1>(a);
52295        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52296        assert_eq_m512i(r, e);
52297    }
52298
52299    #[simd_test(enable = "avx512f")]
52300    unsafe fn test_mm512_mask_ror_epi32() {
52301        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52302        let r = _mm512_mask_ror_epi32::<1>(a, 0, a);
52303        assert_eq_m512i(r, a);
52304        let r = _mm512_mask_ror_epi32::<1>(a, 0b11111111_11111111, a);
52305        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52306        assert_eq_m512i(r, e);
52307    }
52308
52309    #[simd_test(enable = "avx512f")]
52310    unsafe fn test_mm512_maskz_ror_epi32() {
52311        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52312        let r = _mm512_maskz_ror_epi32::<1>(0, a);
52313        assert_eq_m512i(r, _mm512_setzero_si512());
52314        let r = _mm512_maskz_ror_epi32::<1>(0b00000000_11111111, a);
52315        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52316        assert_eq_m512i(r, e);
52317    }
52318
52319    #[simd_test(enable = "avx512f,avx512vl")]
52320    unsafe fn test_mm256_ror_epi32() {
52321        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52322        let r = _mm256_ror_epi32::<1>(a);
52323        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52324        assert_eq_m256i(r, e);
52325    }
52326
52327    #[simd_test(enable = "avx512f,avx512vl")]
52328    unsafe fn test_mm256_mask_ror_epi32() {
52329        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52330        let r = _mm256_mask_ror_epi32::<1>(a, 0, a);
52331        assert_eq_m256i(r, a);
52332        let r = _mm256_mask_ror_epi32::<1>(a, 0b11111111, a);
52333        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52334        assert_eq_m256i(r, e);
52335    }
52336
52337    #[simd_test(enable = "avx512f,avx512vl")]
52338    unsafe fn test_mm256_maskz_ror_epi32() {
52339        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52340        let r = _mm256_maskz_ror_epi32::<1>(0, a);
52341        assert_eq_m256i(r, _mm256_setzero_si256());
52342        let r = _mm256_maskz_ror_epi32::<1>(0b11111111, a);
52343        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52344        assert_eq_m256i(r, e);
52345    }
52346
52347    #[simd_test(enable = "avx512f,avx512vl")]
52348    unsafe fn test_mm_ror_epi32() {
52349        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52350        let r = _mm_ror_epi32::<1>(a);
52351        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52352        assert_eq_m128i(r, e);
52353    }
52354
52355    #[simd_test(enable = "avx512f,avx512vl")]
52356    unsafe fn test_mm_mask_ror_epi32() {
52357        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52358        let r = _mm_mask_ror_epi32::<1>(a, 0, a);
52359        assert_eq_m128i(r, a);
52360        let r = _mm_mask_ror_epi32::<1>(a, 0b00001111, a);
52361        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52362        assert_eq_m128i(r, e);
52363    }
52364
52365    #[simd_test(enable = "avx512f,avx512vl")]
52366    unsafe fn test_mm_maskz_ror_epi32() {
52367        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52368        let r = _mm_maskz_ror_epi32::<1>(0, a);
52369        assert_eq_m128i(r, _mm_setzero_si128());
52370        let r = _mm_maskz_ror_epi32::<1>(0b00001111, a);
52371        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52372        assert_eq_m128i(r, e);
52373    }
52374
52375    #[simd_test(enable = "avx512f")]
52376    unsafe fn test_mm512_slli_epi32() {
52377        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52378        let r = _mm512_slli_epi32::<1>(a);
52379        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52380        assert_eq_m512i(r, e);
52381    }
52382
52383    #[simd_test(enable = "avx512f")]
52384    unsafe fn test_mm512_mask_slli_epi32() {
52385        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52386        let r = _mm512_mask_slli_epi32::<1>(a, 0, a);
52387        assert_eq_m512i(r, a);
52388        let r = _mm512_mask_slli_epi32::<1>(a, 0b11111111_11111111, a);
52389        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52390        assert_eq_m512i(r, e);
52391    }
52392
52393    #[simd_test(enable = "avx512f")]
52394    unsafe fn test_mm512_maskz_slli_epi32() {
52395        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52396        let r = _mm512_maskz_slli_epi32::<1>(0, a);
52397        assert_eq_m512i(r, _mm512_setzero_si512());
52398        let r = _mm512_maskz_slli_epi32::<1>(0b00000000_11111111, a);
52399        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
52400        assert_eq_m512i(r, e);
52401    }
52402
52403    #[simd_test(enable = "avx512f,avx512vl")]
52404    unsafe fn test_mm256_mask_slli_epi32() {
52405        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52406        let r = _mm256_mask_slli_epi32::<1>(a, 0, a);
52407        assert_eq_m256i(r, a);
52408        let r = _mm256_mask_slli_epi32::<1>(a, 0b11111111, a);
52409        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52410        assert_eq_m256i(r, e);
52411    }
52412
52413    #[simd_test(enable = "avx512f,avx512vl")]
52414    unsafe fn test_mm256_maskz_slli_epi32() {
52415        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52416        let r = _mm256_maskz_slli_epi32::<1>(0, a);
52417        assert_eq_m256i(r, _mm256_setzero_si256());
52418        let r = _mm256_maskz_slli_epi32::<1>(0b11111111, a);
52419        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52420        assert_eq_m256i(r, e);
52421    }
52422
52423    #[simd_test(enable = "avx512f,avx512vl")]
52424    unsafe fn test_mm_mask_slli_epi32() {
52425        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52426        let r = _mm_mask_slli_epi32::<1>(a, 0, a);
52427        assert_eq_m128i(r, a);
52428        let r = _mm_mask_slli_epi32::<1>(a, 0b00001111, a);
52429        let e = _mm_set_epi32(0, 2, 2, 2);
52430        assert_eq_m128i(r, e);
52431    }
52432
52433    #[simd_test(enable = "avx512f,avx512vl")]
52434    unsafe fn test_mm_maskz_slli_epi32() {
52435        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52436        let r = _mm_maskz_slli_epi32::<1>(0, a);
52437        assert_eq_m128i(r, _mm_setzero_si128());
52438        let r = _mm_maskz_slli_epi32::<1>(0b00001111, a);
52439        let e = _mm_set_epi32(0, 2, 2, 2);
52440        assert_eq_m128i(r, e);
52441    }
52442
52443    #[simd_test(enable = "avx512f")]
52444    unsafe fn test_mm512_srli_epi32() {
52445        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52446        let r = _mm512_srli_epi32::<1>(a);
52447        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52448        assert_eq_m512i(r, e);
52449    }
52450
52451    #[simd_test(enable = "avx512f")]
52452    unsafe fn test_mm512_mask_srli_epi32() {
52453        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52454        let r = _mm512_mask_srli_epi32::<1>(a, 0, a);
52455        assert_eq_m512i(r, a);
52456        let r = _mm512_mask_srli_epi32::<1>(a, 0b11111111_11111111, a);
52457        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52458        assert_eq_m512i(r, e);
52459    }
52460
52461    #[simd_test(enable = "avx512f")]
52462    unsafe fn test_mm512_maskz_srli_epi32() {
52463        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
52464        let r = _mm512_maskz_srli_epi32::<1>(0, a);
52465        assert_eq_m512i(r, _mm512_setzero_si512());
52466        let r = _mm512_maskz_srli_epi32::<1>(0b00000000_11111111, a);
52467        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
52468        assert_eq_m512i(r, e);
52469    }
52470
52471    #[simd_test(enable = "avx512f,avx512vl")]
52472    unsafe fn test_mm256_mask_srli_epi32() {
52473        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52474        let r = _mm256_mask_srli_epi32::<1>(a, 0, a);
52475        assert_eq_m256i(r, a);
52476        let r = _mm256_mask_srli_epi32::<1>(a, 0b11111111, a);
52477        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52478        assert_eq_m256i(r, e);
52479    }
52480
52481    #[simd_test(enable = "avx512f,avx512vl")]
52482    unsafe fn test_mm256_maskz_srli_epi32() {
52483        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52484        let r = _mm256_maskz_srli_epi32::<1>(0, a);
52485        assert_eq_m256i(r, _mm256_setzero_si256());
52486        let r = _mm256_maskz_srli_epi32::<1>(0b11111111, a);
52487        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52488        assert_eq_m256i(r, e);
52489    }
52490
52491    #[simd_test(enable = "avx512f,avx512vl")]
52492    unsafe fn test_mm_mask_srli_epi32() {
52493        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52494        let r = _mm_mask_srli_epi32::<1>(a, 0, a);
52495        assert_eq_m128i(r, a);
52496        let r = _mm_mask_srli_epi32::<1>(a, 0b00001111, a);
52497        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52498        assert_eq_m128i(r, e);
52499    }
52500
52501    #[simd_test(enable = "avx512f,avx512vl")]
52502    unsafe fn test_mm_maskz_srli_epi32() {
52503        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52504        let r = _mm_maskz_srli_epi32::<1>(0, a);
52505        assert_eq_m128i(r, _mm_setzero_si128());
52506        let r = _mm_maskz_srli_epi32::<1>(0b00001111, a);
52507        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52508        assert_eq_m128i(r, e);
52509    }
52510
52511    #[simd_test(enable = "avx512f")]
52512    unsafe fn test_mm512_rolv_epi32() {
52513        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52514        let b = _mm512_set1_epi32(1);
52515        let r = _mm512_rolv_epi32(a, b);
52516        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52517        assert_eq_m512i(r, e);
52518    }
52519
52520    #[simd_test(enable = "avx512f")]
52521    unsafe fn test_mm512_mask_rolv_epi32() {
52522        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52523        let b = _mm512_set1_epi32(1);
52524        let r = _mm512_mask_rolv_epi32(a, 0, a, b);
52525        assert_eq_m512i(r, a);
52526        let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
52527        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52528        assert_eq_m512i(r, e);
52529    }
52530
52531    #[simd_test(enable = "avx512f")]
52532    unsafe fn test_mm512_maskz_rolv_epi32() {
52533        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52534        let b = _mm512_set1_epi32(1);
52535        let r = _mm512_maskz_rolv_epi32(0, a, b);
52536        assert_eq_m512i(r, _mm512_setzero_si512());
52537        let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
52538        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52539        assert_eq_m512i(r, e);
52540    }
52541
52542    #[simd_test(enable = "avx512f,avx512vl")]
52543    unsafe fn test_mm256_rolv_epi32() {
52544        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52545        let b = _mm256_set1_epi32(1);
52546        let r = _mm256_rolv_epi32(a, b);
52547        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52548        assert_eq_m256i(r, e);
52549    }
52550
52551    #[simd_test(enable = "avx512f,avx512vl")]
52552    unsafe fn test_mm256_mask_rolv_epi32() {
52553        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52554        let b = _mm256_set1_epi32(1);
52555        let r = _mm256_mask_rolv_epi32(a, 0, a, b);
52556        assert_eq_m256i(r, a);
52557        let r = _mm256_mask_rolv_epi32(a, 0b11111111, a, b);
52558        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52559        assert_eq_m256i(r, e);
52560    }
52561
52562    #[simd_test(enable = "avx512f,avx512vl")]
52563    unsafe fn test_mm256_maskz_rolv_epi32() {
52564        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52565        let b = _mm256_set1_epi32(1);
52566        let r = _mm256_maskz_rolv_epi32(0, a, b);
52567        assert_eq_m256i(r, _mm256_setzero_si256());
52568        let r = _mm256_maskz_rolv_epi32(0b11111111, a, b);
52569        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52570        assert_eq_m256i(r, e);
52571    }
52572
52573    #[simd_test(enable = "avx512f,avx512vl")]
52574    unsafe fn test_mm_rolv_epi32() {
52575        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52576        let b = _mm_set1_epi32(1);
52577        let r = _mm_rolv_epi32(a, b);
52578        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52579        assert_eq_m128i(r, e);
52580    }
52581
52582    #[simd_test(enable = "avx512f,avx512vl")]
52583    unsafe fn test_mm_mask_rolv_epi32() {
52584        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52585        let b = _mm_set1_epi32(1);
52586        let r = _mm_mask_rolv_epi32(a, 0, a, b);
52587        assert_eq_m128i(r, a);
52588        let r = _mm_mask_rolv_epi32(a, 0b00001111, a, b);
52589        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52590        assert_eq_m128i(r, e);
52591    }
52592
52593    #[simd_test(enable = "avx512f,avx512vl")]
52594    unsafe fn test_mm_maskz_rolv_epi32() {
52595        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52596        let b = _mm_set1_epi32(1);
52597        let r = _mm_maskz_rolv_epi32(0, a, b);
52598        assert_eq_m128i(r, _mm_setzero_si128());
52599        let r = _mm_maskz_rolv_epi32(0b00001111, a, b);
52600        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52601        assert_eq_m128i(r, e);
52602    }
52603
52604    #[simd_test(enable = "avx512f")]
52605    unsafe fn test_mm512_rorv_epi32() {
52606        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52607        let b = _mm512_set1_epi32(1);
52608        let r = _mm512_rorv_epi32(a, b);
52609        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52610        assert_eq_m512i(r, e);
52611    }
52612
52613    #[simd_test(enable = "avx512f")]
52614    unsafe fn test_mm512_mask_rorv_epi32() {
52615        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52616        let b = _mm512_set1_epi32(1);
52617        let r = _mm512_mask_rorv_epi32(a, 0, a, b);
52618        assert_eq_m512i(r, a);
52619        let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
52620        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52621        assert_eq_m512i(r, e);
52622    }
52623
52624    #[simd_test(enable = "avx512f")]
52625    unsafe fn test_mm512_maskz_rorv_epi32() {
52626        let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52627        let b = _mm512_set1_epi32(1);
52628        let r = _mm512_maskz_rorv_epi32(0, a, b);
52629        assert_eq_m512i(r, _mm512_setzero_si512());
52630        let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
52631        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52632        assert_eq_m512i(r, e);
52633    }
52634
52635    #[simd_test(enable = "avx512f,avx512vl")]
52636    unsafe fn test_mm256_rorv_epi32() {
52637        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52638        let b = _mm256_set1_epi32(1);
52639        let r = _mm256_rorv_epi32(a, b);
52640        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52641        assert_eq_m256i(r, e);
52642    }
52643
52644    #[simd_test(enable = "avx512f,avx512vl")]
52645    unsafe fn test_mm256_mask_rorv_epi32() {
52646        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52647        let b = _mm256_set1_epi32(1);
52648        let r = _mm256_mask_rorv_epi32(a, 0, a, b);
52649        assert_eq_m256i(r, a);
52650        let r = _mm256_mask_rorv_epi32(a, 0b11111111, a, b);
52651        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52652        assert_eq_m256i(r, e);
52653    }
52654
52655    #[simd_test(enable = "avx512f,avx512vl")]
52656    unsafe fn test_mm256_maskz_rorv_epi32() {
52657        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52658        let b = _mm256_set1_epi32(1);
52659        let r = _mm256_maskz_rorv_epi32(0, a, b);
52660        assert_eq_m256i(r, _mm256_setzero_si256());
52661        let r = _mm256_maskz_rorv_epi32(0b11111111, a, b);
52662        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52663        assert_eq_m256i(r, e);
52664    }
52665
52666    #[simd_test(enable = "avx512f,avx512vl")]
52667    unsafe fn test_mm_rorv_epi32() {
52668        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52669        let b = _mm_set1_epi32(1);
52670        let r = _mm_rorv_epi32(a, b);
52671        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52672        assert_eq_m128i(r, e);
52673    }
52674
52675    #[simd_test(enable = "avx512f,avx512vl")]
52676    unsafe fn test_mm_mask_rorv_epi32() {
52677        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52678        let b = _mm_set1_epi32(1);
52679        let r = _mm_mask_rorv_epi32(a, 0, a, b);
52680        assert_eq_m128i(r, a);
52681        let r = _mm_mask_rorv_epi32(a, 0b00001111, a, b);
52682        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52683        assert_eq_m128i(r, e);
52684    }
52685
52686    #[simd_test(enable = "avx512f,avx512vl")]
52687    unsafe fn test_mm_maskz_rorv_epi32() {
52688        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52689        let b = _mm_set1_epi32(1);
52690        let r = _mm_maskz_rorv_epi32(0, a, b);
52691        assert_eq_m128i(r, _mm_setzero_si128());
52692        let r = _mm_maskz_rorv_epi32(0b00001111, a, b);
52693        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52694        assert_eq_m128i(r, e);
52695    }
52696
52697    #[simd_test(enable = "avx512f")]
52698    unsafe fn test_mm512_sllv_epi32() {
52699        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52700        let count = _mm512_set1_epi32(1);
52701        let r = _mm512_sllv_epi32(a, count);
52702        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52703        assert_eq_m512i(r, e);
52704    }
52705
52706    #[simd_test(enable = "avx512f")]
52707    unsafe fn test_mm512_mask_sllv_epi32() {
52708        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52709        let count = _mm512_set1_epi32(1);
52710        let r = _mm512_mask_sllv_epi32(a, 0, a, count);
52711        assert_eq_m512i(r, a);
52712        let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
52713        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52714        assert_eq_m512i(r, e);
52715    }
52716
52717    #[simd_test(enable = "avx512f")]
52718    unsafe fn test_mm512_maskz_sllv_epi32() {
52719        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52720        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52721        let r = _mm512_maskz_sllv_epi32(0, a, count);
52722        assert_eq_m512i(r, _mm512_setzero_si512());
52723        let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
52724        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
52725        assert_eq_m512i(r, e);
52726    }
52727
52728    #[simd_test(enable = "avx512f,avx512vl")]
52729    unsafe fn test_mm256_mask_sllv_epi32() {
52730        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52731        let count = _mm256_set1_epi32(1);
52732        let r = _mm256_mask_sllv_epi32(a, 0, a, count);
52733        assert_eq_m256i(r, a);
52734        let r = _mm256_mask_sllv_epi32(a, 0b11111111, a, count);
52735        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52736        assert_eq_m256i(r, e);
52737    }
52738
52739    #[simd_test(enable = "avx512f,avx512vl")]
52740    unsafe fn test_mm256_maskz_sllv_epi32() {
52741        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52742        let count = _mm256_set1_epi32(1);
52743        let r = _mm256_maskz_sllv_epi32(0, a, count);
52744        assert_eq_m256i(r, _mm256_setzero_si256());
52745        let r = _mm256_maskz_sllv_epi32(0b11111111, a, count);
52746        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52747        assert_eq_m256i(r, e);
52748    }
52749
52750    #[simd_test(enable = "avx512f,avx512vl")]
52751    unsafe fn test_mm_mask_sllv_epi32() {
52752        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52753        let count = _mm_set1_epi32(1);
52754        let r = _mm_mask_sllv_epi32(a, 0, a, count);
52755        assert_eq_m128i(r, a);
52756        let r = _mm_mask_sllv_epi32(a, 0b00001111, a, count);
52757        let e = _mm_set_epi32(0, 2, 2, 2);
52758        assert_eq_m128i(r, e);
52759    }
52760
52761    #[simd_test(enable = "avx512f,avx512vl")]
52762    unsafe fn test_mm_maskz_sllv_epi32() {
52763        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52764        let count = _mm_set1_epi32(1);
52765        let r = _mm_maskz_sllv_epi32(0, a, count);
52766        assert_eq_m128i(r, _mm_setzero_si128());
52767        let r = _mm_maskz_sllv_epi32(0b00001111, a, count);
52768        let e = _mm_set_epi32(0, 2, 2, 2);
52769        assert_eq_m128i(r, e);
52770    }
52771
52772    #[simd_test(enable = "avx512f")]
52773    unsafe fn test_mm512_srlv_epi32() {
52774        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52775        let count = _mm512_set1_epi32(1);
52776        let r = _mm512_srlv_epi32(a, count);
52777        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52778        assert_eq_m512i(r, e);
52779    }
52780
52781    #[simd_test(enable = "avx512f")]
52782    unsafe fn test_mm512_mask_srlv_epi32() {
52783        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52784        let count = _mm512_set1_epi32(1);
52785        let r = _mm512_mask_srlv_epi32(a, 0, a, count);
52786        assert_eq_m512i(r, a);
52787        let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
52788        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52789        assert_eq_m512i(r, e);
52790    }
52791
52792    #[simd_test(enable = "avx512f")]
52793    unsafe fn test_mm512_maskz_srlv_epi32() {
52794        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
52795        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52796        let r = _mm512_maskz_srlv_epi32(0, a, count);
52797        assert_eq_m512i(r, _mm512_setzero_si512());
52798        let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
52799        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
52800        assert_eq_m512i(r, e);
52801    }
52802
52803    #[simd_test(enable = "avx512f,avx512vl")]
52804    unsafe fn test_mm256_mask_srlv_epi32() {
52805        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52806        let count = _mm256_set1_epi32(1);
52807        let r = _mm256_mask_srlv_epi32(a, 0, a, count);
52808        assert_eq_m256i(r, a);
52809        let r = _mm256_mask_srlv_epi32(a, 0b11111111, a, count);
52810        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52811        assert_eq_m256i(r, e);
52812    }
52813
52814    #[simd_test(enable = "avx512f,avx512vl")]
52815    unsafe fn test_mm256_maskz_srlv_epi32() {
52816        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52817        let count = _mm256_set1_epi32(1);
52818        let r = _mm256_maskz_srlv_epi32(0, a, count);
52819        assert_eq_m256i(r, _mm256_setzero_si256());
52820        let r = _mm256_maskz_srlv_epi32(0b11111111, a, count);
52821        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52822        assert_eq_m256i(r, e);
52823    }
52824
52825    #[simd_test(enable = "avx512f,avx512vl")]
52826    unsafe fn test_mm_mask_srlv_epi32() {
52827        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52828        let count = _mm_set1_epi32(1);
52829        let r = _mm_mask_srlv_epi32(a, 0, a, count);
52830        assert_eq_m128i(r, a);
52831        let r = _mm_mask_srlv_epi32(a, 0b00001111, a, count);
52832        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52833        assert_eq_m128i(r, e);
52834    }
52835
52836    #[simd_test(enable = "avx512f,avx512vl")]
52837    unsafe fn test_mm_maskz_srlv_epi32() {
52838        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52839        let count = _mm_set1_epi32(1);
52840        let r = _mm_maskz_srlv_epi32(0, a, count);
52841        assert_eq_m128i(r, _mm_setzero_si128());
52842        let r = _mm_maskz_srlv_epi32(0b00001111, a, count);
52843        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52844        assert_eq_m128i(r, e);
52845    }
52846
52847    #[simd_test(enable = "avx512f")]
52848    unsafe fn test_mm512_sll_epi32() {
52849        #[rustfmt::skip]
52850        let a = _mm512_set_epi32(
52851            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52852            0, 0, 0, 0,
52853            0, 0, 0, 0,
52854            0, 0, 0, 0,
52855        );
52856        let count = _mm_set_epi32(0, 0, 0, 2);
52857        let r = _mm512_sll_epi32(a, count);
52858        #[rustfmt::skip]
52859        let e = _mm512_set_epi32(
52860            0, 1 << 2, 1 << 3, 1 << 4,
52861            0, 0, 0, 0,
52862            0, 0, 0, 0,
52863            0, 0, 0, 0,
52864        );
52865        assert_eq_m512i(r, e);
52866    }
52867
52868    #[simd_test(enable = "avx512f")]
52869    unsafe fn test_mm512_mask_sll_epi32() {
52870        #[rustfmt::skip]
52871        let a = _mm512_set_epi32(
52872            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52873            0, 0, 0, 0,
52874            0, 0, 0, 0,
52875            0, 0, 0, 0,
52876        );
52877        let count = _mm_set_epi32(0, 0, 0, 2);
52878        let r = _mm512_mask_sll_epi32(a, 0, a, count);
52879        assert_eq_m512i(r, a);
52880        let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
52881        #[rustfmt::skip]
52882        let e = _mm512_set_epi32(
52883            0, 1 << 2, 1 << 3, 1 << 4,
52884            0, 0, 0, 0,
52885            0, 0, 0, 0,
52886            0, 0, 0, 0,
52887        );
52888        assert_eq_m512i(r, e);
52889    }
52890
52891    #[simd_test(enable = "avx512f")]
52892    unsafe fn test_mm512_maskz_sll_epi32() {
52893        #[rustfmt::skip]
52894        let a = _mm512_set_epi32(
52895            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52896            0, 0, 0, 0,
52897            0, 0, 0, 0,
52898            0, 0, 0, 1 << 31,
52899        );
52900        let count = _mm_set_epi32(2, 0, 0, 2);
52901        let r = _mm512_maskz_sll_epi32(0, a, count);
52902        assert_eq_m512i(r, _mm512_setzero_si512());
52903        let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
52904        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52905        assert_eq_m512i(r, e);
52906    }
52907
52908    #[simd_test(enable = "avx512f,avx512vl")]
52909    unsafe fn test_mm256_mask_sll_epi32() {
52910        let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
52911        let count = _mm_set_epi32(0, 0, 0, 1);
52912        let r = _mm256_mask_sll_epi32(a, 0, a, count);
52913        assert_eq_m256i(r, a);
52914        let r = _mm256_mask_sll_epi32(a, 0b11111111, a, count);
52915        let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
52916        assert_eq_m256i(r, e);
52917    }
52918
52919    #[simd_test(enable = "avx512f,avx512vl")]
52920    unsafe fn test_mm256_maskz_sll_epi32() {
52921        let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
52922        let count = _mm_set_epi32(0, 0, 0, 1);
52923        let r = _mm256_maskz_sll_epi32(0, a, count);
52924        assert_eq_m256i(r, _mm256_setzero_si256());
52925        let r = _mm256_maskz_sll_epi32(0b11111111, a, count);
52926        let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
52927        assert_eq_m256i(r, e);
52928    }
52929
52930    #[simd_test(enable = "avx512f,avx512vl")]
52931    unsafe fn test_mm_mask_sll_epi32() {
52932        let a = _mm_set_epi32(1 << 13, 0, 0, 0);
52933        let count = _mm_set_epi32(0, 0, 0, 1);
52934        let r = _mm_mask_sll_epi32(a, 0, a, count);
52935        assert_eq_m128i(r, a);
52936        let r = _mm_mask_sll_epi32(a, 0b00001111, a, count);
52937        let e = _mm_set_epi32(1 << 14, 0, 0, 0);
52938        assert_eq_m128i(r, e);
52939    }
52940
52941    #[simd_test(enable = "avx512f,avx512vl")]
52942    unsafe fn test_mm_maskz_sll_epi32() {
52943        let a = _mm_set_epi32(1 << 13, 0, 0, 0);
52944        let count = _mm_set_epi32(0, 0, 0, 1);
52945        let r = _mm_maskz_sll_epi32(0, a, count);
52946        assert_eq_m128i(r, _mm_setzero_si128());
52947        let r = _mm_maskz_sll_epi32(0b00001111, a, count);
52948        let e = _mm_set_epi32(1 << 14, 0, 0, 0);
52949        assert_eq_m128i(r, e);
52950    }
52951
52952    #[simd_test(enable = "avx512f")]
52953    unsafe fn test_mm512_srl_epi32() {
52954        #[rustfmt::skip]
52955        let a = _mm512_set_epi32(
52956            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52957            0, 0, 0, 0,
52958            0, 0, 0, 0,
52959            0, 0, 0, 0,
52960        );
52961        let count = _mm_set_epi32(0, 0, 0, 2);
52962        let r = _mm512_srl_epi32(a, count);
52963        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52964        assert_eq_m512i(r, e);
52965    }
52966
52967    #[simd_test(enable = "avx512f")]
52968    unsafe fn test_mm512_mask_srl_epi32() {
52969        #[rustfmt::skip]
52970        let a = _mm512_set_epi32(
52971            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52972            0, 0, 0, 0,
52973            0, 0, 0, 0,
52974            0, 0, 0, 0,
52975        );
52976        let count = _mm_set_epi32(0, 0, 0, 2);
52977        let r = _mm512_mask_srl_epi32(a, 0, a, count);
52978        assert_eq_m512i(r, a);
52979        let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
52980        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52981        assert_eq_m512i(r, e);
52982    }
52983
52984    #[simd_test(enable = "avx512f")]
52985    unsafe fn test_mm512_maskz_srl_epi32() {
52986        #[rustfmt::skip]
52987        let a = _mm512_set_epi32(
52988            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52989            0, 0, 0, 0,
52990            0, 0, 0, 0,
52991            0, 0, 0, 1 << 31,
52992        );
52993        let count = _mm_set_epi32(2, 0, 0, 2);
52994        let r = _mm512_maskz_srl_epi32(0, a, count);
52995        assert_eq_m512i(r, _mm512_setzero_si512());
52996        let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
52997        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
52998        assert_eq_m512i(r, e);
52999    }
53000
53001    #[simd_test(enable = "avx512f,avx512vl")]
53002    unsafe fn test_mm256_mask_srl_epi32() {
53003        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53004        let count = _mm_set_epi32(0, 0, 0, 1);
53005        let r = _mm256_mask_srl_epi32(a, 0, a, count);
53006        assert_eq_m256i(r, a);
53007        let r = _mm256_mask_srl_epi32(a, 0b11111111, a, count);
53008        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53009        assert_eq_m256i(r, e);
53010    }
53011
53012    #[simd_test(enable = "avx512f,avx512vl")]
53013    unsafe fn test_mm256_maskz_srl_epi32() {
53014        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53015        let count = _mm_set_epi32(0, 0, 0, 1);
53016        let r = _mm256_maskz_srl_epi32(0, a, count);
53017        assert_eq_m256i(r, _mm256_setzero_si256());
53018        let r = _mm256_maskz_srl_epi32(0b11111111, a, count);
53019        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53020        assert_eq_m256i(r, e);
53021    }
53022
53023    #[simd_test(enable = "avx512f,avx512vl")]
53024    unsafe fn test_mm_mask_srl_epi32() {
53025        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53026        let count = _mm_set_epi32(0, 0, 0, 1);
53027        let r = _mm_mask_srl_epi32(a, 0, a, count);
53028        assert_eq_m128i(r, a);
53029        let r = _mm_mask_srl_epi32(a, 0b00001111, a, count);
53030        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53031        assert_eq_m128i(r, e);
53032    }
53033
53034    #[simd_test(enable = "avx512f,avx512vl")]
53035    unsafe fn test_mm_maskz_srl_epi32() {
53036        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53037        let count = _mm_set_epi32(0, 0, 0, 1);
53038        let r = _mm_maskz_srl_epi32(0, a, count);
53039        assert_eq_m128i(r, _mm_setzero_si128());
53040        let r = _mm_maskz_srl_epi32(0b00001111, a, count);
53041        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53042        assert_eq_m128i(r, e);
53043    }
53044
53045    #[simd_test(enable = "avx512f")]
53046    unsafe fn test_mm512_sra_epi32() {
53047        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53048        let count = _mm_set_epi32(1, 0, 0, 2);
53049        let r = _mm512_sra_epi32(a, count);
53050        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
53051        assert_eq_m512i(r, e);
53052    }
53053
53054    #[simd_test(enable = "avx512f")]
53055    unsafe fn test_mm512_mask_sra_epi32() {
53056        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
53057        let count = _mm_set_epi32(0, 0, 0, 2);
53058        let r = _mm512_mask_sra_epi32(a, 0, a, count);
53059        assert_eq_m512i(r, a);
53060        let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
53061        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
53062        assert_eq_m512i(r, e);
53063    }
53064
53065    #[simd_test(enable = "avx512f")]
53066    unsafe fn test_mm512_maskz_sra_epi32() {
53067        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
53068        let count = _mm_set_epi32(2, 0, 0, 2);
53069        let r = _mm512_maskz_sra_epi32(0, a, count);
53070        assert_eq_m512i(r, _mm512_setzero_si512());
53071        let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
53072        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
53073        assert_eq_m512i(r, e);
53074    }
53075
53076    #[simd_test(enable = "avx512f,avx512vl")]
53077    unsafe fn test_mm256_mask_sra_epi32() {
53078        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53079        let count = _mm_set_epi32(0, 0, 0, 1);
53080        let r = _mm256_mask_sra_epi32(a, 0, a, count);
53081        assert_eq_m256i(r, a);
53082        let r = _mm256_mask_sra_epi32(a, 0b11111111, a, count);
53083        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53084        assert_eq_m256i(r, e);
53085    }
53086
53087    #[simd_test(enable = "avx512f,avx512vl")]
53088    unsafe fn test_mm256_maskz_sra_epi32() {
53089        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53090        let count = _mm_set_epi32(0, 0, 0, 1);
53091        let r = _mm256_maskz_sra_epi32(0, a, count);
53092        assert_eq_m256i(r, _mm256_setzero_si256());
53093        let r = _mm256_maskz_sra_epi32(0b11111111, a, count);
53094        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53095        assert_eq_m256i(r, e);
53096    }
53097
53098    #[simd_test(enable = "avx512f,avx512vl")]
53099    unsafe fn test_mm_mask_sra_epi32() {
53100        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53101        let count = _mm_set_epi32(0, 0, 0, 1);
53102        let r = _mm_mask_sra_epi32(a, 0, a, count);
53103        assert_eq_m128i(r, a);
53104        let r = _mm_mask_sra_epi32(a, 0b00001111, a, count);
53105        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53106        assert_eq_m128i(r, e);
53107    }
53108
53109    #[simd_test(enable = "avx512f,avx512vl")]
53110    unsafe fn test_mm_maskz_sra_epi32() {
53111        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53112        let count = _mm_set_epi32(0, 0, 0, 1);
53113        let r = _mm_maskz_sra_epi32(0, a, count);
53114        assert_eq_m128i(r, _mm_setzero_si128());
53115        let r = _mm_maskz_sra_epi32(0b00001111, a, count);
53116        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53117        assert_eq_m128i(r, e);
53118    }
53119
53120    #[simd_test(enable = "avx512f")]
53121    unsafe fn test_mm512_srav_epi32() {
53122        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53123        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
53124        let r = _mm512_srav_epi32(a, count);
53125        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53126        assert_eq_m512i(r, e);
53127    }
53128
53129    #[simd_test(enable = "avx512f")]
53130    unsafe fn test_mm512_mask_srav_epi32() {
53131        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
53132        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53133        let r = _mm512_mask_srav_epi32(a, 0, a, count);
53134        assert_eq_m512i(r, a);
53135        let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
53136        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
53137        assert_eq_m512i(r, e);
53138    }
53139
53140    #[simd_test(enable = "avx512f")]
53141    unsafe fn test_mm512_maskz_srav_epi32() {
53142        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
53143        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
53144        let r = _mm512_maskz_srav_epi32(0, a, count);
53145        assert_eq_m512i(r, _mm512_setzero_si512());
53146        let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
53147        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
53148        assert_eq_m512i(r, e);
53149    }
53150
53151    #[simd_test(enable = "avx512f,avx512vl")]
53152    unsafe fn test_mm256_mask_srav_epi32() {
53153        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53154        let count = _mm256_set1_epi32(1);
53155        let r = _mm256_mask_srav_epi32(a, 0, a, count);
53156        assert_eq_m256i(r, a);
53157        let r = _mm256_mask_srav_epi32(a, 0b11111111, a, count);
53158        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53159        assert_eq_m256i(r, e);
53160    }
53161
53162    #[simd_test(enable = "avx512f,avx512vl")]
53163    unsafe fn test_mm256_maskz_srav_epi32() {
53164        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53165        let count = _mm256_set1_epi32(1);
53166        let r = _mm256_maskz_srav_epi32(0, a, count);
53167        assert_eq_m256i(r, _mm256_setzero_si256());
53168        let r = _mm256_maskz_srav_epi32(0b11111111, a, count);
53169        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53170        assert_eq_m256i(r, e);
53171    }
53172
53173    #[simd_test(enable = "avx512f,avx512vl")]
53174    unsafe fn test_mm_mask_srav_epi32() {
53175        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53176        let count = _mm_set1_epi32(1);
53177        let r = _mm_mask_srav_epi32(a, 0, a, count);
53178        assert_eq_m128i(r, a);
53179        let r = _mm_mask_srav_epi32(a, 0b00001111, a, count);
53180        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53181        assert_eq_m128i(r, e);
53182    }
53183
53184    #[simd_test(enable = "avx512f,avx512vl")]
53185    unsafe fn test_mm_maskz_srav_epi32() {
53186        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53187        let count = _mm_set1_epi32(1);
53188        let r = _mm_maskz_srav_epi32(0, a, count);
53189        assert_eq_m128i(r, _mm_setzero_si128());
53190        let r = _mm_maskz_srav_epi32(0b00001111, a, count);
53191        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53192        assert_eq_m128i(r, e);
53193    }
53194
53195    #[simd_test(enable = "avx512f")]
53196    unsafe fn test_mm512_srai_epi32() {
53197        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
53198        let r = _mm512_srai_epi32::<2>(a);
53199        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
53200        assert_eq_m512i(r, e);
53201    }
53202
53203    #[simd_test(enable = "avx512f")]
53204    unsafe fn test_mm512_mask_srai_epi32() {
53205        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
53206        let r = _mm512_mask_srai_epi32::<2>(a, 0, a);
53207        assert_eq_m512i(r, a);
53208        let r = _mm512_mask_srai_epi32::<2>(a, 0b11111111_11111111, a);
53209        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
53210        assert_eq_m512i(r, e);
53211    }
53212
53213    #[simd_test(enable = "avx512f")]
53214    unsafe fn test_mm512_maskz_srai_epi32() {
53215        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
53216        let r = _mm512_maskz_srai_epi32::<2>(0, a);
53217        assert_eq_m512i(r, _mm512_setzero_si512());
53218        let r = _mm512_maskz_srai_epi32::<2>(0b00000000_11111111, a);
53219        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
53220        assert_eq_m512i(r, e);
53221    }
53222
53223    #[simd_test(enable = "avx512f,avx512vl")]
53224    unsafe fn test_mm256_mask_srai_epi32() {
53225        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53226        let r = _mm256_mask_srai_epi32::<1>(a, 0, a);
53227        assert_eq_m256i(r, a);
53228        let r = _mm256_mask_srai_epi32::<1>(a, 0b11111111, a);
53229        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53230        assert_eq_m256i(r, e);
53231    }
53232
53233    #[simd_test(enable = "avx512f,avx512vl")]
53234    unsafe fn test_mm256_maskz_srai_epi32() {
53235        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53236        let r = _mm256_maskz_srai_epi32::<1>(0, a);
53237        assert_eq_m256i(r, _mm256_setzero_si256());
53238        let r = _mm256_maskz_srai_epi32::<1>(0b11111111, a);
53239        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53240        assert_eq_m256i(r, e);
53241    }
53242
53243    #[simd_test(enable = "avx512f,avx512vl")]
53244    unsafe fn test_mm_mask_srai_epi32() {
53245        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53246        let r = _mm_mask_srai_epi32::<1>(a, 0, a);
53247        assert_eq_m128i(r, a);
53248        let r = _mm_mask_srai_epi32::<1>(a, 0b00001111, a);
53249        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53250        assert_eq_m128i(r, e);
53251    }
53252
53253    #[simd_test(enable = "avx512f,avx512vl")]
53254    unsafe fn test_mm_maskz_srai_epi32() {
53255        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53256        let r = _mm_maskz_srai_epi32::<1>(0, a);
53257        assert_eq_m128i(r, _mm_setzero_si128());
53258        let r = _mm_maskz_srai_epi32::<1>(0b00001111, a);
53259        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53260        assert_eq_m128i(r, e);
53261    }
53262
53263    #[simd_test(enable = "avx512f")]
53264    unsafe fn test_mm512_permute_ps() {
53265        let a = _mm512_setr_ps(
53266            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53267        );
53268        let r = _mm512_permute_ps::<0b11_11_11_11>(a);
53269        let e = _mm512_setr_ps(
53270            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53271        );
53272        assert_eq_m512(r, e);
53273    }
53274
53275    #[simd_test(enable = "avx512f")]
53276    unsafe fn test_mm512_mask_permute_ps() {
53277        let a = _mm512_setr_ps(
53278            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53279        );
53280        let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53281        assert_eq_m512(r, a);
53282        let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111_11111111, a);
53283        let e = _mm512_setr_ps(
53284            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53285        );
53286        assert_eq_m512(r, e);
53287    }
53288
53289    #[simd_test(enable = "avx512f")]
53290    unsafe fn test_mm512_maskz_permute_ps() {
53291        let a = _mm512_setr_ps(
53292            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53293        );
53294        let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0, a);
53295        assert_eq_m512(r, _mm512_setzero_ps());
53296        let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0b11111111_11111111, a);
53297        let e = _mm512_setr_ps(
53298            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53299        );
53300        assert_eq_m512(r, e);
53301    }
53302
53303    #[simd_test(enable = "avx512f,avx512vl")]
53304    unsafe fn test_mm256_mask_permute_ps() {
53305        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53306        let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53307        assert_eq_m256(r, a);
53308        let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111, a);
53309        let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
53310        assert_eq_m256(r, e);
53311    }
53312
53313    #[simd_test(enable = "avx512f,avx512vl")]
53314    unsafe fn test_mm256_maskz_permute_ps() {
53315        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53316        let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0, a);
53317        assert_eq_m256(r, _mm256_setzero_ps());
53318        let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0b11111111, a);
53319        let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
53320        assert_eq_m256(r, e);
53321    }
53322
53323    #[simd_test(enable = "avx512f,avx512vl")]
53324    unsafe fn test_mm_mask_permute_ps() {
53325        let a = _mm_set_ps(0., 1., 2., 3.);
53326        let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53327        assert_eq_m128(r, a);
53328        let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0b00001111, a);
53329        let e = _mm_set_ps(0., 0., 0., 0.);
53330        assert_eq_m128(r, e);
53331    }
53332
53333    #[simd_test(enable = "avx512f,avx512vl")]
53334    unsafe fn test_mm_maskz_permute_ps() {
53335        let a = _mm_set_ps(0., 1., 2., 3.);
53336        let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0, a);
53337        assert_eq_m128(r, _mm_setzero_ps());
53338        let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0b00001111, a);
53339        let e = _mm_set_ps(0., 0., 0., 0.);
53340        assert_eq_m128(r, e);
53341    }
53342
53343    #[simd_test(enable = "avx512f")]
53344    unsafe fn test_mm512_permutevar_epi32() {
53345        let idx = _mm512_set1_epi32(1);
53346        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53347        let r = _mm512_permutevar_epi32(idx, a);
53348        let e = _mm512_set1_epi32(14);
53349        assert_eq_m512i(r, e);
53350    }
53351
53352    #[simd_test(enable = "avx512f")]
53353    unsafe fn test_mm512_mask_permutevar_epi32() {
53354        let idx = _mm512_set1_epi32(1);
53355        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53356        let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
53357        assert_eq_m512i(r, a);
53358        let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a);
53359        let e = _mm512_set1_epi32(14);
53360        assert_eq_m512i(r, e);
53361    }
53362
53363    #[simd_test(enable = "avx512f")]
53364    unsafe fn test_mm512_permutevar_ps() {
53365        let a = _mm512_set_ps(
53366            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53367        );
53368        let b = _mm512_set1_epi32(0b01);
53369        let r = _mm512_permutevar_ps(a, b);
53370        let e = _mm512_set_ps(
53371            2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
53372        );
53373        assert_eq_m512(r, e);
53374    }
53375
53376    #[simd_test(enable = "avx512f")]
53377    unsafe fn test_mm512_mask_permutevar_ps() {
53378        let a = _mm512_set_ps(
53379            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53380        );
53381        let b = _mm512_set1_epi32(0b01);
53382        let r = _mm512_mask_permutevar_ps(a, 0, a, b);
53383        assert_eq_m512(r, a);
53384        let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b);
53385        let e = _mm512_set_ps(
53386            2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
53387        );
53388        assert_eq_m512(r, e);
53389    }
53390
53391    #[simd_test(enable = "avx512f")]
53392    unsafe fn test_mm512_maskz_permutevar_ps() {
53393        let a = _mm512_set_ps(
53394            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53395        );
53396        let b = _mm512_set1_epi32(0b01);
53397        let r = _mm512_maskz_permutevar_ps(0, a, b);
53398        assert_eq_m512(r, _mm512_setzero_ps());
53399        let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b);
53400        let e = _mm512_set_ps(
53401            0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
53402        );
53403        assert_eq_m512(r, e);
53404    }
53405
53406    #[simd_test(enable = "avx512f,avx512vl")]
53407    unsafe fn test_mm256_mask_permutevar_ps() {
53408        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53409        let b = _mm256_set1_epi32(0b01);
53410        let r = _mm256_mask_permutevar_ps(a, 0, a, b);
53411        assert_eq_m256(r, a);
53412        let r = _mm256_mask_permutevar_ps(a, 0b11111111, a, b);
53413        let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
53414        assert_eq_m256(r, e);
53415    }
53416
53417    #[simd_test(enable = "avx512f,avx512vl")]
53418    unsafe fn test_mm256_maskz_permutevar_ps() {
53419        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53420        let b = _mm256_set1_epi32(0b01);
53421        let r = _mm256_maskz_permutevar_ps(0, a, b);
53422        assert_eq_m256(r, _mm256_setzero_ps());
53423        let r = _mm256_maskz_permutevar_ps(0b11111111, a, b);
53424        let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
53425        assert_eq_m256(r, e);
53426    }
53427
53428    #[simd_test(enable = "avx512f,avx512vl")]
53429    unsafe fn test_mm_mask_permutevar_ps() {
53430        let a = _mm_set_ps(0., 1., 2., 3.);
53431        let b = _mm_set1_epi32(0b01);
53432        let r = _mm_mask_permutevar_ps(a, 0, a, b);
53433        assert_eq_m128(r, a);
53434        let r = _mm_mask_permutevar_ps(a, 0b00001111, a, b);
53435        let e = _mm_set_ps(2., 2., 2., 2.);
53436        assert_eq_m128(r, e);
53437    }
53438
53439    #[simd_test(enable = "avx512f,avx512vl")]
53440    unsafe fn test_mm_maskz_permutevar_ps() {
53441        let a = _mm_set_ps(0., 1., 2., 3.);
53442        let b = _mm_set1_epi32(0b01);
53443        let r = _mm_maskz_permutevar_ps(0, a, b);
53444        assert_eq_m128(r, _mm_setzero_ps());
53445        let r = _mm_maskz_permutevar_ps(0b00001111, a, b);
53446        let e = _mm_set_ps(2., 2., 2., 2.);
53447        assert_eq_m128(r, e);
53448    }
53449
53450    #[simd_test(enable = "avx512f")]
53451    unsafe fn test_mm512_permutexvar_epi32() {
53452        let idx = _mm512_set1_epi32(1);
53453        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53454        let r = _mm512_permutexvar_epi32(idx, a);
53455        let e = _mm512_set1_epi32(14);
53456        assert_eq_m512i(r, e);
53457    }
53458
53459    #[simd_test(enable = "avx512f")]
53460    unsafe fn test_mm512_mask_permutexvar_epi32() {
53461        let idx = _mm512_set1_epi32(1);
53462        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53463        let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
53464        assert_eq_m512i(r, a);
53465        let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a);
53466        let e = _mm512_set1_epi32(14);
53467        assert_eq_m512i(r, e);
53468    }
53469
53470    #[simd_test(enable = "avx512f")]
53471    unsafe fn test_mm512_maskz_permutexvar_epi32() {
53472        let idx = _mm512_set1_epi32(1);
53473        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53474        let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
53475        assert_eq_m512i(r, _mm512_setzero_si512());
53476        let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a);
53477        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
53478        assert_eq_m512i(r, e);
53479    }
53480
53481    #[simd_test(enable = "avx512f,avx512vl")]
53482    unsafe fn test_mm256_permutexvar_epi32() {
53483        let idx = _mm256_set1_epi32(1);
53484        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53485        let r = _mm256_permutexvar_epi32(idx, a);
53486        let e = _mm256_set1_epi32(6);
53487        assert_eq_m256i(r, e);
53488    }
53489
53490    #[simd_test(enable = "avx512f,avx512vl")]
53491    unsafe fn test_mm256_mask_permutexvar_epi32() {
53492        let idx = _mm256_set1_epi32(1);
53493        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53494        let r = _mm256_mask_permutexvar_epi32(a, 0, idx, a);
53495        assert_eq_m256i(r, a);
53496        let r = _mm256_mask_permutexvar_epi32(a, 0b11111111, idx, a);
53497        let e = _mm256_set1_epi32(6);
53498        assert_eq_m256i(r, e);
53499    }
53500
53501    #[simd_test(enable = "avx512f,avx512vl")]
53502    unsafe fn test_mm256_maskz_permutexvar_epi32() {
53503        let idx = _mm256_set1_epi32(1);
53504        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53505        let r = _mm256_maskz_permutexvar_epi32(0, idx, a);
53506        assert_eq_m256i(r, _mm256_setzero_si256());
53507        let r = _mm256_maskz_permutexvar_epi32(0b11111111, idx, a);
53508        let e = _mm256_set1_epi32(6);
53509        assert_eq_m256i(r, e);
53510    }
53511
53512    #[simd_test(enable = "avx512f")]
53513    unsafe fn test_mm512_permutexvar_ps() {
53514        let idx = _mm512_set1_epi32(1);
53515        let a = _mm512_set_ps(
53516            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53517        );
53518        let r = _mm512_permutexvar_ps(idx, a);
53519        let e = _mm512_set1_ps(14.);
53520        assert_eq_m512(r, e);
53521    }
53522
53523    #[simd_test(enable = "avx512f")]
53524    unsafe fn test_mm512_mask_permutexvar_ps() {
53525        let idx = _mm512_set1_epi32(1);
53526        let a = _mm512_set_ps(
53527            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53528        );
53529        let r = _mm512_mask_permutexvar_ps(a, 0, idx, a);
53530        assert_eq_m512(r, a);
53531        let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a);
53532        let e = _mm512_set1_ps(14.);
53533        assert_eq_m512(r, e);
53534    }
53535
53536    #[simd_test(enable = "avx512f")]
53537    unsafe fn test_mm512_maskz_permutexvar_ps() {
53538        let idx = _mm512_set1_epi32(1);
53539        let a = _mm512_set_ps(
53540            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53541        );
53542        let r = _mm512_maskz_permutexvar_ps(0, idx, a);
53543        assert_eq_m512(r, _mm512_setzero_ps());
53544        let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a);
53545        let e = _mm512_set_ps(
53546            0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
53547        );
53548        assert_eq_m512(r, e);
53549    }
53550
53551    #[simd_test(enable = "avx512f,avx512vl")]
53552    unsafe fn test_mm256_permutexvar_ps() {
53553        let idx = _mm256_set1_epi32(1);
53554        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53555        let r = _mm256_permutexvar_ps(idx, a);
53556        let e = _mm256_set1_ps(6.);
53557        assert_eq_m256(r, e);
53558    }
53559
53560    #[simd_test(enable = "avx512f,avx512vl")]
53561    unsafe fn test_mm256_mask_permutexvar_ps() {
53562        let idx = _mm256_set1_epi32(1);
53563        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53564        let r = _mm256_mask_permutexvar_ps(a, 0, idx, a);
53565        assert_eq_m256(r, a);
53566        let r = _mm256_mask_permutexvar_ps(a, 0b11111111, idx, a);
53567        let e = _mm256_set1_ps(6.);
53568        assert_eq_m256(r, e);
53569    }
53570
53571    #[simd_test(enable = "avx512f,avx512vl")]
53572    unsafe fn test_mm256_maskz_permutexvar_ps() {
53573        let idx = _mm256_set1_epi32(1);
53574        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53575        let r = _mm256_maskz_permutexvar_ps(0, idx, a);
53576        assert_eq_m256(r, _mm256_setzero_ps());
53577        let r = _mm256_maskz_permutexvar_ps(0b11111111, idx, a);
53578        let e = _mm256_set1_ps(6.);
53579        assert_eq_m256(r, e);
53580    }
53581
53582    #[simd_test(enable = "avx512f")]
53583    unsafe fn test_mm512_permutex2var_epi32() {
53584        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53585        #[rustfmt::skip]
53586        let idx = _mm512_set_epi32(
53587            1, 1 << 4, 2, 1 << 4,
53588            3, 1 << 4, 4, 1 << 4,
53589            5, 1 << 4, 6, 1 << 4,
53590            7, 1 << 4, 8, 1 << 4,
53591        );
53592        let b = _mm512_set1_epi32(100);
53593        let r = _mm512_permutex2var_epi32(a, idx, b);
53594        let e = _mm512_set_epi32(
53595            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
53596        );
53597        assert_eq_m512i(r, e);
53598    }
53599
53600    #[simd_test(enable = "avx512f")]
53601    unsafe fn test_mm512_mask_permutex2var_epi32() {
53602        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53603        #[rustfmt::skip]
53604        let idx = _mm512_set_epi32(
53605            1, 1 << 4, 2, 1 << 4,
53606            3, 1 << 4, 4, 1 << 4,
53607            5, 1 << 4, 6, 1 << 4,
53608            7, 1 << 4, 8, 1 << 4,
53609        );
53610        let b = _mm512_set1_epi32(100);
53611        let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b);
53612        assert_eq_m512i(r, a);
53613        let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b);
53614        let e = _mm512_set_epi32(
53615            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
53616        );
53617        assert_eq_m512i(r, e);
53618    }
53619
53620    #[simd_test(enable = "avx512f")]
53621    unsafe fn test_mm512_maskz_permutex2var_epi32() {
53622        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53623        #[rustfmt::skip]
53624        let idx = _mm512_set_epi32(
53625            1, 1 << 4, 2, 1 << 4,
53626            3, 1 << 4, 4, 1 << 4,
53627            5, 1 << 4, 6, 1 << 4,
53628            7, 1 << 4, 8, 1 << 4,
53629        );
53630        let b = _mm512_set1_epi32(100);
53631        let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b);
53632        assert_eq_m512i(r, _mm512_setzero_si512());
53633        let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b);
53634        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
53635        assert_eq_m512i(r, e);
53636    }
53637
53638    #[simd_test(enable = "avx512f")]
53639    unsafe fn test_mm512_mask2_permutex2var_epi32() {
53640        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53641        #[rustfmt::skip]
53642        let idx = _mm512_set_epi32(
53643            1000, 1 << 4, 2000, 1 << 4,
53644            3000, 1 << 4, 4000, 1 << 4,
53645            5, 1 << 4, 6, 1 << 4,
53646            7, 1 << 4, 8, 1 << 4,
53647        );
53648        let b = _mm512_set1_epi32(100);
53649        let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b);
53650        assert_eq_m512i(r, idx);
53651        let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b);
53652        #[rustfmt::skip]
53653        let e = _mm512_set_epi32(
53654            1000, 1 << 4, 2000, 1 << 4,
53655            3000, 1 << 4, 4000, 1 << 4,
53656            10, 100, 9, 100,
53657            8, 100, 7, 100,
53658        );
53659        assert_eq_m512i(r, e);
53660    }
53661
53662    #[simd_test(enable = "avx512f,avx512vl")]
53663    unsafe fn test_mm256_permutex2var_epi32() {
53664        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53665        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53666        let b = _mm256_set1_epi32(100);
53667        let r = _mm256_permutex2var_epi32(a, idx, b);
53668        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53669        assert_eq_m256i(r, e);
53670    }
53671
53672    #[simd_test(enable = "avx512f,avx512vl")]
53673    unsafe fn test_mm256_mask_permutex2var_epi32() {
53674        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53675        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53676        let b = _mm256_set1_epi32(100);
53677        let r = _mm256_mask_permutex2var_epi32(a, 0, idx, b);
53678        assert_eq_m256i(r, a);
53679        let r = _mm256_mask_permutex2var_epi32(a, 0b11111111, idx, b);
53680        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53681        assert_eq_m256i(r, e);
53682    }
53683
53684    #[simd_test(enable = "avx512f,avx512vl")]
53685    unsafe fn test_mm256_maskz_permutex2var_epi32() {
53686        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53687        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53688        let b = _mm256_set1_epi32(100);
53689        let r = _mm256_maskz_permutex2var_epi32(0, a, idx, b);
53690        assert_eq_m256i(r, _mm256_setzero_si256());
53691        let r = _mm256_maskz_permutex2var_epi32(0b11111111, a, idx, b);
53692        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53693        assert_eq_m256i(r, e);
53694    }
53695
53696    #[simd_test(enable = "avx512f,avx512vl")]
53697    unsafe fn test_mm256_mask2_permutex2var_epi32() {
53698        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53699        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53700        let b = _mm256_set1_epi32(100);
53701        let r = _mm256_mask2_permutex2var_epi32(a, idx, 0, b);
53702        assert_eq_m256i(r, idx);
53703        let r = _mm256_mask2_permutex2var_epi32(a, idx, 0b11111111, b);
53704        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53705        assert_eq_m256i(r, e);
53706    }
53707
53708    #[simd_test(enable = "avx512f,avx512vl")]
53709    unsafe fn test_mm_permutex2var_epi32() {
53710        let a = _mm_set_epi32(0, 1, 2, 3);
53711        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53712        let b = _mm_set1_epi32(100);
53713        let r = _mm_permutex2var_epi32(a, idx, b);
53714        let e = _mm_set_epi32(2, 100, 1, 100);
53715        assert_eq_m128i(r, e);
53716    }
53717
53718    #[simd_test(enable = "avx512f,avx512vl")]
53719    unsafe fn test_mm_mask_permutex2var_epi32() {
53720        let a = _mm_set_epi32(0, 1, 2, 3);
53721        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53722        let b = _mm_set1_epi32(100);
53723        let r = _mm_mask_permutex2var_epi32(a, 0, idx, b);
53724        assert_eq_m128i(r, a);
53725        let r = _mm_mask_permutex2var_epi32(a, 0b00001111, idx, b);
53726        let e = _mm_set_epi32(2, 100, 1, 100);
53727        assert_eq_m128i(r, e);
53728    }
53729
53730    #[simd_test(enable = "avx512f,avx512vl")]
53731    unsafe fn test_mm_maskz_permutex2var_epi32() {
53732        let a = _mm_set_epi32(0, 1, 2, 3);
53733        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53734        let b = _mm_set1_epi32(100);
53735        let r = _mm_maskz_permutex2var_epi32(0, a, idx, b);
53736        assert_eq_m128i(r, _mm_setzero_si128());
53737        let r = _mm_maskz_permutex2var_epi32(0b00001111, a, idx, b);
53738        let e = _mm_set_epi32(2, 100, 1, 100);
53739        assert_eq_m128i(r, e);
53740    }
53741
53742    #[simd_test(enable = "avx512f,avx512vl")]
53743    unsafe fn test_mm_mask2_permutex2var_epi32() {
53744        let a = _mm_set_epi32(0, 1, 2, 3);
53745        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53746        let b = _mm_set1_epi32(100);
53747        let r = _mm_mask2_permutex2var_epi32(a, idx, 0, b);
53748        assert_eq_m128i(r, idx);
53749        let r = _mm_mask2_permutex2var_epi32(a, idx, 0b00001111, b);
53750        let e = _mm_set_epi32(2, 100, 1, 100);
53751        assert_eq_m128i(r, e);
53752    }
53753
53754    #[simd_test(enable = "avx512f")]
53755    unsafe fn test_mm512_permutex2var_ps() {
53756        let a = _mm512_set_ps(
53757            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53758        );
53759        #[rustfmt::skip]
53760        let idx = _mm512_set_epi32(
53761            1, 1 << 4, 2, 1 << 4,
53762            3, 1 << 4, 4, 1 << 4,
53763            5, 1 << 4, 6, 1 << 4,
53764            7, 1 << 4, 8, 1 << 4,
53765        );
53766        let b = _mm512_set1_ps(100.);
53767        let r = _mm512_permutex2var_ps(a, idx, b);
53768        let e = _mm512_set_ps(
53769            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53770        );
53771        assert_eq_m512(r, e);
53772    }
53773
53774    #[simd_test(enable = "avx512f")]
53775    unsafe fn test_mm512_mask_permutex2var_ps() {
53776        let a = _mm512_set_ps(
53777            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53778        );
53779        #[rustfmt::skip]
53780        let idx = _mm512_set_epi32(
53781            1, 1 << 4, 2, 1 << 4,
53782            3, 1 << 4, 4, 1 << 4,
53783            5, 1 << 4, 6, 1 << 4,
53784            7, 1 << 4, 8, 1 << 4,
53785        );
53786        let b = _mm512_set1_ps(100.);
53787        let r = _mm512_mask_permutex2var_ps(a, 0, idx, b);
53788        assert_eq_m512(r, a);
53789        let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b);
53790        let e = _mm512_set_ps(
53791            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53792        );
53793        assert_eq_m512(r, e);
53794    }
53795
53796    #[simd_test(enable = "avx512f")]
53797    unsafe fn test_mm512_maskz_permutex2var_ps() {
53798        let a = _mm512_set_ps(
53799            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53800        );
53801        #[rustfmt::skip]
53802        let idx = _mm512_set_epi32(
53803            1, 1 << 4, 2, 1 << 4,
53804            3, 1 << 4, 4, 1 << 4,
53805            5, 1 << 4, 6, 1 << 4,
53806            7, 1 << 4, 8, 1 << 4,
53807        );
53808        let b = _mm512_set1_ps(100.);
53809        let r = _mm512_maskz_permutex2var_ps(0, a, idx, b);
53810        assert_eq_m512(r, _mm512_setzero_ps());
53811        let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b);
53812        let e = _mm512_set_ps(
53813            0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
53814        );
53815        assert_eq_m512(r, e);
53816    }
53817
53818    #[simd_test(enable = "avx512f")]
53819    unsafe fn test_mm512_mask2_permutex2var_ps() {
53820        let a = _mm512_set_ps(
53821            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53822        );
53823        #[rustfmt::skip]
53824        let idx = _mm512_set_epi32(
53825            1, 1 << 4, 2, 1 << 4,
53826            3, 1 << 4, 4, 1 << 4,
53827            5, 1 << 4, 6, 1 << 4,
53828            7, 1 << 4, 8, 1 << 4,
53829        );
53830        let b = _mm512_set1_ps(100.);
53831        let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b);
53832        assert_eq_m512(r, _mm512_castsi512_ps(idx));
53833        let r = _mm512_mask2_permutex2var_ps(a, idx, 0b11111111_11111111, b);
53834        let e = _mm512_set_ps(
53835            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53836        );
53837        assert_eq_m512(r, e);
53838    }
53839
53840    #[simd_test(enable = "avx512f,avx512vl")]
53841    unsafe fn test_mm256_permutex2var_ps() {
53842        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53843        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53844        let b = _mm256_set1_ps(100.);
53845        let r = _mm256_permutex2var_ps(a, idx, b);
53846        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53847        assert_eq_m256(r, e);
53848    }
53849
53850    #[simd_test(enable = "avx512f,avx512vl")]
53851    unsafe fn test_mm256_mask_permutex2var_ps() {
53852        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53853        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53854        let b = _mm256_set1_ps(100.);
53855        let r = _mm256_mask_permutex2var_ps(a, 0, idx, b);
53856        assert_eq_m256(r, a);
53857        let r = _mm256_mask_permutex2var_ps(a, 0b11111111, idx, b);
53858        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53859        assert_eq_m256(r, e);
53860    }
53861
53862    #[simd_test(enable = "avx512f,avx512vl")]
53863    unsafe fn test_mm256_maskz_permutex2var_ps() {
53864        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53865        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53866        let b = _mm256_set1_ps(100.);
53867        let r = _mm256_maskz_permutex2var_ps(0, a, idx, b);
53868        assert_eq_m256(r, _mm256_setzero_ps());
53869        let r = _mm256_maskz_permutex2var_ps(0b11111111, a, idx, b);
53870        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53871        assert_eq_m256(r, e);
53872    }
53873
53874    #[simd_test(enable = "avx512f,avx512vl")]
53875    unsafe fn test_mm256_mask2_permutex2var_ps() {
53876        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53877        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53878        let b = _mm256_set1_ps(100.);
53879        let r = _mm256_mask2_permutex2var_ps(a, idx, 0, b);
53880        assert_eq_m256(r, _mm256_castsi256_ps(idx));
53881        let r = _mm256_mask2_permutex2var_ps(a, idx, 0b11111111, b);
53882        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53883        assert_eq_m256(r, e);
53884    }
53885
53886    #[simd_test(enable = "avx512f,avx512vl")]
53887    unsafe fn test_mm_permutex2var_ps() {
53888        let a = _mm_set_ps(0., 1., 2., 3.);
53889        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53890        let b = _mm_set1_ps(100.);
53891        let r = _mm_permutex2var_ps(a, idx, b);
53892        let e = _mm_set_ps(2., 100., 1., 100.);
53893        assert_eq_m128(r, e);
53894    }
53895
53896    #[simd_test(enable = "avx512f,avx512vl")]
53897    unsafe fn test_mm_mask_permutex2var_ps() {
53898        let a = _mm_set_ps(0., 1., 2., 3.);
53899        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53900        let b = _mm_set1_ps(100.);
53901        let r = _mm_mask_permutex2var_ps(a, 0, idx, b);
53902        assert_eq_m128(r, a);
53903        let r = _mm_mask_permutex2var_ps(a, 0b00001111, idx, b);
53904        let e = _mm_set_ps(2., 100., 1., 100.);
53905        assert_eq_m128(r, e);
53906    }
53907
53908    #[simd_test(enable = "avx512f,avx512vl")]
53909    unsafe fn test_mm_maskz_permutex2var_ps() {
53910        let a = _mm_set_ps(0., 1., 2., 3.);
53911        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53912        let b = _mm_set1_ps(100.);
53913        let r = _mm_maskz_permutex2var_ps(0, a, idx, b);
53914        assert_eq_m128(r, _mm_setzero_ps());
53915        let r = _mm_maskz_permutex2var_ps(0b00001111, a, idx, b);
53916        let e = _mm_set_ps(2., 100., 1., 100.);
53917        assert_eq_m128(r, e);
53918    }
53919
53920    #[simd_test(enable = "avx512f,avx512vl")]
53921    unsafe fn test_mm_mask2_permutex2var_ps() {
53922        let a = _mm_set_ps(0., 1., 2., 3.);
53923        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53924        let b = _mm_set1_ps(100.);
53925        let r = _mm_mask2_permutex2var_ps(a, idx, 0, b);
53926        assert_eq_m128(r, _mm_castsi128_ps(idx));
53927        let r = _mm_mask2_permutex2var_ps(a, idx, 0b00001111, b);
53928        let e = _mm_set_ps(2., 100., 1., 100.);
53929        assert_eq_m128(r, e);
53930    }
53931
53932    #[simd_test(enable = "avx512f")]
53933    unsafe fn test_mm512_shuffle_epi32() {
53934        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53935        let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a);
53936        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
53937        assert_eq_m512i(r, e);
53938    }
53939
53940    #[simd_test(enable = "avx512f")]
53941    unsafe fn test_mm512_mask_shuffle_epi32() {
53942        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53943        let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53944        assert_eq_m512i(r, a);
53945        let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111_11111111, a);
53946        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
53947        assert_eq_m512i(r, e);
53948    }
53949
53950    #[simd_test(enable = "avx512f")]
53951    unsafe fn test_mm512_maskz_shuffle_epi32() {
53952        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53953        let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53954        assert_eq_m512i(r, _mm512_setzero_si512());
53955        let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00000000_11111111, a);
53956        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
53957        assert_eq_m512i(r, e);
53958    }
53959
53960    #[simd_test(enable = "avx512f,avx512vl")]
53961    unsafe fn test_mm256_mask_shuffle_epi32() {
53962        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
53963        let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53964        assert_eq_m256i(r, a);
53965        let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111, a);
53966        let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
53967        assert_eq_m256i(r, e);
53968    }
53969
53970    #[simd_test(enable = "avx512f,avx512vl")]
53971    unsafe fn test_mm256_maskz_shuffle_epi32() {
53972        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
53973        let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53974        assert_eq_m256i(r, _mm256_setzero_si256());
53975        let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b11111111, a);
53976        let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
53977        assert_eq_m256i(r, e);
53978    }
53979
53980    #[simd_test(enable = "avx512f,avx512vl")]
53981    unsafe fn test_mm_mask_shuffle_epi32() {
53982        let a = _mm_set_epi32(1, 4, 5, 8);
53983        let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53984        assert_eq_m128i(r, a);
53985        let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b00001111, a);
53986        let e = _mm_set_epi32(8, 8, 1, 1);
53987        assert_eq_m128i(r, e);
53988    }
53989
53990    #[simd_test(enable = "avx512f,avx512vl")]
53991    unsafe fn test_mm_maskz_shuffle_epi32() {
53992        let a = _mm_set_epi32(1, 4, 5, 8);
53993        let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53994        assert_eq_m128i(r, _mm_setzero_si128());
53995        let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00001111, a);
53996        let e = _mm_set_epi32(8, 8, 1, 1);
53997        assert_eq_m128i(r, e);
53998    }
53999
54000    #[simd_test(enable = "avx512f")]
54001    unsafe fn test_mm512_shuffle_ps() {
54002        let a = _mm512_setr_ps(
54003            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54004        );
54005        let b = _mm512_setr_ps(
54006            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54007        );
54008        let r = _mm512_shuffle_ps::<0b00_00_11_11>(a, b);
54009        let e = _mm512_setr_ps(
54010            8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
54011        );
54012        assert_eq_m512(r, e);
54013    }
54014
54015    #[simd_test(enable = "avx512f")]
54016    unsafe fn test_mm512_mask_shuffle_ps() {
54017        let a = _mm512_setr_ps(
54018            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54019        );
54020        let b = _mm512_setr_ps(
54021            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54022        );
54023        let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0, a, b);
54024        assert_eq_m512(r, a);
54025        let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111_11111111, a, b);
54026        let e = _mm512_setr_ps(
54027            8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
54028        );
54029        assert_eq_m512(r, e);
54030    }
54031
54032    #[simd_test(enable = "avx512f")]
54033    unsafe fn test_mm512_maskz_shuffle_ps() {
54034        let a = _mm512_setr_ps(
54035            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54036        );
54037        let b = _mm512_setr_ps(
54038            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54039        );
54040        let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0, a, b);
54041        assert_eq_m512(r, _mm512_setzero_ps());
54042        let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0b00000000_11111111, a, b);
54043        let e = _mm512_setr_ps(
54044            8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
54045        );
54046        assert_eq_m512(r, e);
54047    }
54048
54049    #[simd_test(enable = "avx512f,avx512vl")]
54050    unsafe fn test_mm256_mask_shuffle_ps() {
54051        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54052        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54053        let r = _mm256_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
54054        assert_eq_m256(r, a);
54055        let r = _mm256_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111, a, b);
54056        let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
54057        assert_eq_m256(r, e);
54058    }
54059
54060    #[simd_test(enable = "avx512f,avx512vl")]
54061    unsafe fn test_mm256_maskz_shuffle_ps() {
54062        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54063        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54064        let r = _mm256_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
54065        assert_eq_m256(r, _mm256_setzero_ps());
54066        let r = _mm256_maskz_shuffle_ps::<0b00_00_11_11>(0b11111111, a, b);
54067        let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
54068        assert_eq_m256(r, e);
54069    }
54070
54071    #[simd_test(enable = "avx512f,avx512vl")]
54072    unsafe fn test_mm_mask_shuffle_ps() {
54073        let a = _mm_set_ps(1., 4., 5., 8.);
54074        let b = _mm_set_ps(2., 3., 6., 7.);
54075        let r = _mm_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
54076        assert_eq_m128(r, a);
54077        let r = _mm_mask_shuffle_ps::<0b00_00_11_11>(a, 0b00001111, a, b);
54078        let e = _mm_set_ps(7., 7., 1., 1.);
54079        assert_eq_m128(r, e);
54080    }
54081
54082    #[simd_test(enable = "avx512f,avx512vl")]
54083    unsafe fn test_mm_maskz_shuffle_ps() {
54084        let a = _mm_set_ps(1., 4., 5., 8.);
54085        let b = _mm_set_ps(2., 3., 6., 7.);
54086        let r = _mm_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
54087        assert_eq_m128(r, _mm_setzero_ps());
54088        let r = _mm_maskz_shuffle_ps::<0b00_00_11_11>(0b00001111, a, b);
54089        let e = _mm_set_ps(7., 7., 1., 1.);
54090        assert_eq_m128(r, e);
54091    }
54092
54093    #[simd_test(enable = "avx512f")]
54094    unsafe fn test_mm512_shuffle_i32x4() {
54095        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54096        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54097        let r = _mm512_shuffle_i32x4::<0b00_00_00_00>(a, b);
54098        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
54099        assert_eq_m512i(r, e);
54100    }
54101
54102    #[simd_test(enable = "avx512f")]
54103    unsafe fn test_mm512_mask_shuffle_i32x4() {
54104        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54105        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54106        let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0, a, b);
54107        assert_eq_m512i(r, a);
54108        let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
54109        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
54110        assert_eq_m512i(r, e);
54111    }
54112
54113    #[simd_test(enable = "avx512f")]
54114    unsafe fn test_mm512_maskz_shuffle_i32x4() {
54115        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54116        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54117        let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0, a, b);
54118        assert_eq_m512i(r, _mm512_setzero_si512());
54119        let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
54120        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
54121        assert_eq_m512i(r, e);
54122    }
54123
54124    #[simd_test(enable = "avx512f,avx512vl")]
54125    unsafe fn test_mm256_shuffle_i32x4() {
54126        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54127        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54128        let r = _mm256_shuffle_i32x4::<0b00>(a, b);
54129        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54130        assert_eq_m256i(r, e);
54131    }
54132
54133    #[simd_test(enable = "avx512f,avx512vl")]
54134    unsafe fn test_mm256_mask_shuffle_i32x4() {
54135        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54136        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54137        let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0, a, b);
54138        assert_eq_m256i(r, a);
54139        let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0b11111111, a, b);
54140        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54141        assert_eq_m256i(r, e);
54142    }
54143
54144    #[simd_test(enable = "avx512f,avx512vl")]
54145    unsafe fn test_mm256_maskz_shuffle_i32x4() {
54146        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54147        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54148        let r = _mm256_maskz_shuffle_i32x4::<0b00>(0, a, b);
54149        assert_eq_m256i(r, _mm256_setzero_si256());
54150        let r = _mm256_maskz_shuffle_i32x4::<0b00>(0b11111111, a, b);
54151        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54152        assert_eq_m256i(r, e);
54153    }
54154
54155    #[simd_test(enable = "avx512f")]
54156    unsafe fn test_mm512_shuffle_f32x4() {
54157        let a = _mm512_setr_ps(
54158            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54159        );
54160        let b = _mm512_setr_ps(
54161            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54162        );
54163        let r = _mm512_shuffle_f32x4::<0b00_00_00_00>(a, b);
54164        let e = _mm512_setr_ps(
54165            1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
54166        );
54167        assert_eq_m512(r, e);
54168    }
54169
54170    #[simd_test(enable = "avx512f")]
54171    unsafe fn test_mm512_mask_shuffle_f32x4() {
54172        let a = _mm512_setr_ps(
54173            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54174        );
54175        let b = _mm512_setr_ps(
54176            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54177        );
54178        let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0, a, b);
54179        assert_eq_m512(r, a);
54180        let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
54181        let e = _mm512_setr_ps(
54182            1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
54183        );
54184        assert_eq_m512(r, e);
54185    }
54186
54187    #[simd_test(enable = "avx512f")]
54188    unsafe fn test_mm512_maskz_shuffle_f32x4() {
54189        let a = _mm512_setr_ps(
54190            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54191        );
54192        let b = _mm512_setr_ps(
54193            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54194        );
54195        let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0, a, b);
54196        assert_eq_m512(r, _mm512_setzero_ps());
54197        let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
54198        let e = _mm512_setr_ps(
54199            1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54200        );
54201        assert_eq_m512(r, e);
54202    }
54203
54204    #[simd_test(enable = "avx512f,avx512vl")]
54205    unsafe fn test_mm256_shuffle_f32x4() {
54206        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54207        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54208        let r = _mm256_shuffle_f32x4::<0b00>(a, b);
54209        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54210        assert_eq_m256(r, e);
54211    }
54212
54213    #[simd_test(enable = "avx512f,avx512vl")]
54214    unsafe fn test_mm256_mask_shuffle_f32x4() {
54215        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54216        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54217        let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0, a, b);
54218        assert_eq_m256(r, a);
54219        let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0b11111111, a, b);
54220        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54221        assert_eq_m256(r, e);
54222    }
54223
54224    #[simd_test(enable = "avx512f,avx512vl")]
54225    unsafe fn test_mm256_maskz_shuffle_f32x4() {
54226        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54227        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54228        let r = _mm256_maskz_shuffle_f32x4::<0b00>(0, a, b);
54229        assert_eq_m256(r, _mm256_setzero_ps());
54230        let r = _mm256_maskz_shuffle_f32x4::<0b00>(0b11111111, a, b);
54231        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54232        assert_eq_m256(r, e);
54233    }
54234
54235    #[simd_test(enable = "avx512f")]
54236    unsafe fn test_mm512_extractf32x4_ps() {
54237        let a = _mm512_setr_ps(
54238            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54239        );
54240        let r = _mm512_extractf32x4_ps::<1>(a);
54241        let e = _mm_setr_ps(5., 6., 7., 8.);
54242        assert_eq_m128(r, e);
54243    }
54244
54245    #[simd_test(enable = "avx512f")]
54246    unsafe fn test_mm512_mask_extractf32x4_ps() {
54247        let a = _mm512_setr_ps(
54248            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54249        );
54250        let src = _mm_set1_ps(100.);
54251        let r = _mm512_mask_extractf32x4_ps::<1>(src, 0, a);
54252        assert_eq_m128(r, src);
54253        let r = _mm512_mask_extractf32x4_ps::<1>(src, 0b11111111, a);
54254        let e = _mm_setr_ps(5., 6., 7., 8.);
54255        assert_eq_m128(r, e);
54256    }
54257
54258    #[simd_test(enable = "avx512f")]
54259    unsafe fn test_mm512_maskz_extractf32x4_ps() {
54260        let a = _mm512_setr_ps(
54261            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54262        );
54263        let r = _mm512_maskz_extractf32x4_ps::<1>(0, a);
54264        assert_eq_m128(r, _mm_setzero_ps());
54265        let r = _mm512_maskz_extractf32x4_ps::<1>(0b00000001, a);
54266        let e = _mm_setr_ps(5., 0., 0., 0.);
54267        assert_eq_m128(r, e);
54268    }
54269
54270    #[simd_test(enable = "avx512f,avx512vl")]
54271    unsafe fn test_mm256_extractf32x4_ps() {
54272        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54273        let r = _mm256_extractf32x4_ps::<1>(a);
54274        let e = _mm_set_ps(1., 2., 3., 4.);
54275        assert_eq_m128(r, e);
54276    }
54277
54278    #[simd_test(enable = "avx512f,avx512vl")]
54279    unsafe fn test_mm256_mask_extractf32x4_ps() {
54280        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54281        let src = _mm_set1_ps(100.);
54282        let r = _mm256_mask_extractf32x4_ps::<1>(src, 0, a);
54283        assert_eq_m128(r, src);
54284        let r = _mm256_mask_extractf32x4_ps::<1>(src, 0b00001111, a);
54285        let e = _mm_set_ps(1., 2., 3., 4.);
54286        assert_eq_m128(r, e);
54287    }
54288
54289    #[simd_test(enable = "avx512f,avx512vl")]
54290    unsafe fn test_mm256_maskz_extractf32x4_ps() {
54291        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54292        let r = _mm256_maskz_extractf32x4_ps::<1>(0, a);
54293        assert_eq_m128(r, _mm_setzero_ps());
54294        let r = _mm256_maskz_extractf32x4_ps::<1>(0b00001111, a);
54295        let e = _mm_set_ps(1., 2., 3., 4.);
54296        assert_eq_m128(r, e);
54297    }
54298
54299    #[simd_test(enable = "avx512f")]
54300    unsafe fn test_mm512_extracti32x4_epi32() {
54301        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54302        let r = _mm512_extracti32x4_epi32::<1>(a);
54303        let e = _mm_setr_epi32(5, 6, 7, 8);
54304        assert_eq_m128i(r, e);
54305    }
54306
54307    #[simd_test(enable = "avx512f")]
54308    unsafe fn test_mm512_mask_extracti32x4_epi32() {
54309        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54310        let src = _mm_set1_epi32(100);
54311        let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0, a);
54312        assert_eq_m128i(r, src);
54313        let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0b11111111, a);
54314        let e = _mm_setr_epi32(5, 6, 7, 8);
54315        assert_eq_m128i(r, e);
54316    }
54317
54318    #[simd_test(enable = "avx512f,avx512vl")]
54319    unsafe fn test_mm512_maskz_extracti32x4_epi32() {
54320        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54321        let r = _mm512_maskz_extracti32x4_epi32::<1>(0, a);
54322        assert_eq_m128i(r, _mm_setzero_si128());
54323        let r = _mm512_maskz_extracti32x4_epi32::<1>(0b00000001, a);
54324        let e = _mm_setr_epi32(5, 0, 0, 0);
54325        assert_eq_m128i(r, e);
54326    }
54327
54328    #[simd_test(enable = "avx512f,avx512vl")]
54329    unsafe fn test_mm256_extracti32x4_epi32() {
54330        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54331        let r = _mm256_extracti32x4_epi32::<1>(a);
54332        let e = _mm_set_epi32(1, 2, 3, 4);
54333        assert_eq_m128i(r, e);
54334    }
54335
54336    #[simd_test(enable = "avx512f,avx512vl")]
54337    unsafe fn test_mm256_mask_extracti32x4_epi32() {
54338        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54339        let src = _mm_set1_epi32(100);
54340        let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0, a);
54341        assert_eq_m128i(r, src);
54342        let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0b00001111, a);
54343        let e = _mm_set_epi32(1, 2, 3, 4);
54344        assert_eq_m128i(r, e);
54345    }
54346
54347    #[simd_test(enable = "avx512f,avx512vl")]
54348    unsafe fn test_mm256_maskz_extracti32x4_epi32() {
54349        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54350        let r = _mm256_maskz_extracti32x4_epi32::<1>(0, a);
54351        assert_eq_m128i(r, _mm_setzero_si128());
54352        let r = _mm256_maskz_extracti32x4_epi32::<1>(0b00001111, a);
54353        let e = _mm_set_epi32(1, 2, 3, 4);
54354        assert_eq_m128i(r, e);
54355    }
54356
54357    #[simd_test(enable = "avx512f")]
54358    unsafe fn test_mm512_moveldup_ps() {
54359        let a = _mm512_setr_ps(
54360            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54361        );
54362        let r = _mm512_moveldup_ps(a);
54363        let e = _mm512_setr_ps(
54364            1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
54365        );
54366        assert_eq_m512(r, e);
54367    }
54368
54369    #[simd_test(enable = "avx512f")]
54370    unsafe fn test_mm512_mask_moveldup_ps() {
54371        let a = _mm512_setr_ps(
54372            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54373        );
54374        let r = _mm512_mask_moveldup_ps(a, 0, a);
54375        assert_eq_m512(r, a);
54376        let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a);
54377        let e = _mm512_setr_ps(
54378            1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
54379        );
54380        assert_eq_m512(r, e);
54381    }
54382
54383    #[simd_test(enable = "avx512f")]
54384    unsafe fn test_mm512_maskz_moveldup_ps() {
54385        let a = _mm512_setr_ps(
54386            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54387        );
54388        let r = _mm512_maskz_moveldup_ps(0, a);
54389        assert_eq_m512(r, _mm512_setzero_ps());
54390        let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a);
54391        let e = _mm512_setr_ps(
54392            1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
54393        );
54394        assert_eq_m512(r, e);
54395    }
54396
54397    #[simd_test(enable = "avx512f,avx512vl")]
54398    unsafe fn test_mm256_mask_moveldup_ps() {
54399        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54400        let r = _mm256_mask_moveldup_ps(a, 0, a);
54401        assert_eq_m256(r, a);
54402        let r = _mm256_mask_moveldup_ps(a, 0b11111111, a);
54403        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
54404        assert_eq_m256(r, e);
54405    }
54406
54407    #[simd_test(enable = "avx512f,avx512vl")]
54408    unsafe fn test_mm256_maskz_moveldup_ps() {
54409        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54410        let r = _mm256_maskz_moveldup_ps(0, a);
54411        assert_eq_m256(r, _mm256_setzero_ps());
54412        let r = _mm256_maskz_moveldup_ps(0b11111111, a);
54413        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
54414        assert_eq_m256(r, e);
54415    }
54416
54417    #[simd_test(enable = "avx512f,avx512vl")]
54418    unsafe fn test_mm_mask_moveldup_ps() {
54419        let a = _mm_set_ps(1., 2., 3., 4.);
54420        let r = _mm_mask_moveldup_ps(a, 0, a);
54421        assert_eq_m128(r, a);
54422        let r = _mm_mask_moveldup_ps(a, 0b00001111, a);
54423        let e = _mm_set_ps(2., 2., 4., 4.);
54424        assert_eq_m128(r, e);
54425    }
54426
54427    #[simd_test(enable = "avx512f,avx512vl")]
54428    unsafe fn test_mm_maskz_moveldup_ps() {
54429        let a = _mm_set_ps(1., 2., 3., 4.);
54430        let r = _mm_maskz_moveldup_ps(0, a);
54431        assert_eq_m128(r, _mm_setzero_ps());
54432        let r = _mm_maskz_moveldup_ps(0b00001111, a);
54433        let e = _mm_set_ps(2., 2., 4., 4.);
54434        assert_eq_m128(r, e);
54435    }
54436
54437    #[simd_test(enable = "avx512f")]
54438    unsafe fn test_mm512_movehdup_ps() {
54439        let a = _mm512_setr_ps(
54440            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54441        );
54442        let r = _mm512_movehdup_ps(a);
54443        let e = _mm512_setr_ps(
54444            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
54445        );
54446        assert_eq_m512(r, e);
54447    }
54448
54449    #[simd_test(enable = "avx512f")]
54450    unsafe fn test_mm512_mask_movehdup_ps() {
54451        let a = _mm512_setr_ps(
54452            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54453        );
54454        let r = _mm512_mask_movehdup_ps(a, 0, a);
54455        assert_eq_m512(r, a);
54456        let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a);
54457        let e = _mm512_setr_ps(
54458            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
54459        );
54460        assert_eq_m512(r, e);
54461    }
54462
54463    #[simd_test(enable = "avx512f")]
54464    unsafe fn test_mm512_maskz_movehdup_ps() {
54465        let a = _mm512_setr_ps(
54466            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54467        );
54468        let r = _mm512_maskz_movehdup_ps(0, a);
54469        assert_eq_m512(r, _mm512_setzero_ps());
54470        let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a);
54471        let e = _mm512_setr_ps(
54472            2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54473        );
54474        assert_eq_m512(r, e);
54475    }
54476
54477    #[simd_test(enable = "avx512f,avx512vl")]
54478    unsafe fn test_mm256_mask_movehdup_ps() {
54479        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54480        let r = _mm256_mask_movehdup_ps(a, 0, a);
54481        assert_eq_m256(r, a);
54482        let r = _mm256_mask_movehdup_ps(a, 0b11111111, a);
54483        let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
54484        assert_eq_m256(r, e);
54485    }
54486
54487    #[simd_test(enable = "avx512f,avx512vl")]
54488    unsafe fn test_mm256_maskz_movehdup_ps() {
54489        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54490        let r = _mm256_maskz_movehdup_ps(0, a);
54491        assert_eq_m256(r, _mm256_setzero_ps());
54492        let r = _mm256_maskz_movehdup_ps(0b11111111, a);
54493        let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
54494        assert_eq_m256(r, e);
54495    }
54496
54497    #[simd_test(enable = "avx512f,avx512vl")]
54498    unsafe fn test_mm_mask_movehdup_ps() {
54499        let a = _mm_set_ps(1., 2., 3., 4.);
54500        let r = _mm_mask_movehdup_ps(a, 0, a);
54501        assert_eq_m128(r, a);
54502        let r = _mm_mask_movehdup_ps(a, 0b00001111, a);
54503        let e = _mm_set_ps(1., 1., 3., 3.);
54504        assert_eq_m128(r, e);
54505    }
54506
54507    #[simd_test(enable = "avx512f,avx512vl")]
54508    unsafe fn test_mm_maskz_movehdup_ps() {
54509        let a = _mm_set_ps(1., 2., 3., 4.);
54510        let r = _mm_maskz_movehdup_ps(0, a);
54511        assert_eq_m128(r, _mm_setzero_ps());
54512        let r = _mm_maskz_movehdup_ps(0b00001111, a);
54513        let e = _mm_set_ps(1., 1., 3., 3.);
54514        assert_eq_m128(r, e);
54515    }
54516
54517    #[simd_test(enable = "avx512f")]
54518    unsafe fn test_mm512_inserti32x4() {
54519        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54520        let b = _mm_setr_epi32(17, 18, 19, 20);
54521        let r = _mm512_inserti32x4::<0>(a, b);
54522        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54523        assert_eq_m512i(r, e);
54524    }
54525
54526    #[simd_test(enable = "avx512f")]
54527    unsafe fn test_mm512_mask_inserti32x4() {
54528        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54529        let b = _mm_setr_epi32(17, 18, 19, 20);
54530        let r = _mm512_mask_inserti32x4::<0>(a, 0, a, b);
54531        assert_eq_m512i(r, a);
54532        let r = _mm512_mask_inserti32x4::<0>(a, 0b11111111_11111111, a, b);
54533        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54534        assert_eq_m512i(r, e);
54535    }
54536
54537    #[simd_test(enable = "avx512f")]
54538    unsafe fn test_mm512_maskz_inserti32x4() {
54539        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54540        let b = _mm_setr_epi32(17, 18, 19, 20);
54541        let r = _mm512_maskz_inserti32x4::<0>(0, a, b);
54542        assert_eq_m512i(r, _mm512_setzero_si512());
54543        let r = _mm512_maskz_inserti32x4::<0>(0b00000000_11111111, a, b);
54544        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0);
54545        assert_eq_m512i(r, e);
54546    }
54547
54548    #[simd_test(enable = "avx512f,avx512vl")]
54549    unsafe fn test_mm256_inserti32x4() {
54550        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54551        let b = _mm_set_epi32(17, 18, 19, 20);
54552        let r = _mm256_inserti32x4::<1>(a, b);
54553        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54554        assert_eq_m256i(r, e);
54555    }
54556
54557    #[simd_test(enable = "avx512f,avx512vl")]
54558    unsafe fn test_mm256_mask_inserti32x4() {
54559        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54560        let b = _mm_set_epi32(17, 18, 19, 20);
54561        let r = _mm256_mask_inserti32x4::<0>(a, 0, a, b);
54562        assert_eq_m256i(r, a);
54563        let r = _mm256_mask_inserti32x4::<1>(a, 0b11111111, a, b);
54564        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54565        assert_eq_m256i(r, e);
54566    }
54567
54568    #[simd_test(enable = "avx512f,avx512vl")]
54569    unsafe fn test_mm256_maskz_inserti32x4() {
54570        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54571        let b = _mm_set_epi32(17, 18, 19, 20);
54572        let r = _mm256_maskz_inserti32x4::<0>(0, a, b);
54573        assert_eq_m256i(r, _mm256_setzero_si256());
54574        let r = _mm256_maskz_inserti32x4::<1>(0b11111111, a, b);
54575        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54576        assert_eq_m256i(r, e);
54577    }
54578
54579    #[simd_test(enable = "avx512f")]
54580    unsafe fn test_mm512_insertf32x4() {
54581        let a = _mm512_setr_ps(
54582            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54583        );
54584        let b = _mm_setr_ps(17., 18., 19., 20.);
54585        let r = _mm512_insertf32x4::<0>(a, b);
54586        let e = _mm512_setr_ps(
54587            17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54588        );
54589        assert_eq_m512(r, e);
54590    }
54591
54592    #[simd_test(enable = "avx512f")]
54593    unsafe fn test_mm512_mask_insertf32x4() {
54594        let a = _mm512_setr_ps(
54595            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54596        );
54597        let b = _mm_setr_ps(17., 18., 19., 20.);
54598        let r = _mm512_mask_insertf32x4::<0>(a, 0, a, b);
54599        assert_eq_m512(r, a);
54600        let r = _mm512_mask_insertf32x4::<0>(a, 0b11111111_11111111, a, b);
54601        let e = _mm512_setr_ps(
54602            17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54603        );
54604        assert_eq_m512(r, e);
54605    }
54606
54607    #[simd_test(enable = "avx512f")]
54608    unsafe fn test_mm512_maskz_insertf32x4() {
54609        let a = _mm512_setr_ps(
54610            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54611        );
54612        let b = _mm_setr_ps(17., 18., 19., 20.);
54613        let r = _mm512_maskz_insertf32x4::<0>(0, a, b);
54614        assert_eq_m512(r, _mm512_setzero_ps());
54615        let r = _mm512_maskz_insertf32x4::<0>(0b00000000_11111111, a, b);
54616        let e = _mm512_setr_ps(
54617            17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54618        );
54619        assert_eq_m512(r, e);
54620    }
54621
54622    #[simd_test(enable = "avx512f,avx512vl")]
54623    unsafe fn test_mm256_insertf32x4() {
54624        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54625        let b = _mm_set_ps(17., 18., 19., 20.);
54626        let r = _mm256_insertf32x4::<1>(a, b);
54627        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54628        assert_eq_m256(r, e);
54629    }
54630
54631    #[simd_test(enable = "avx512f,avx512vl")]
54632    unsafe fn test_mm256_mask_insertf32x4() {
54633        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54634        let b = _mm_set_ps(17., 18., 19., 20.);
54635        let r = _mm256_mask_insertf32x4::<0>(a, 0, a, b);
54636        assert_eq_m256(r, a);
54637        let r = _mm256_mask_insertf32x4::<1>(a, 0b11111111, a, b);
54638        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54639        assert_eq_m256(r, e);
54640    }
54641
54642    #[simd_test(enable = "avx512f,avx512vl")]
54643    unsafe fn test_mm256_maskz_insertf32x4() {
54644        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54645        let b = _mm_set_ps(17., 18., 19., 20.);
54646        let r = _mm256_maskz_insertf32x4::<0>(0, a, b);
54647        assert_eq_m256(r, _mm256_setzero_ps());
54648        let r = _mm256_maskz_insertf32x4::<1>(0b11111111, a, b);
54649        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54650        assert_eq_m256(r, e);
54651    }
54652
54653    #[simd_test(enable = "avx512f")]
54654    unsafe fn test_mm512_castps128_ps512() {
54655        let a = _mm_setr_ps(17., 18., 19., 20.);
54656        let r = _mm512_castps128_ps512(a);
54657        assert_eq_m128(_mm512_castps512_ps128(r), a);
54658    }
54659
54660    #[simd_test(enable = "avx512f")]
54661    unsafe fn test_mm512_castps256_ps512() {
54662        let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54663        let r = _mm512_castps256_ps512(a);
54664        assert_eq_m256(_mm512_castps512_ps256(r), a);
54665    }
54666
54667    #[simd_test(enable = "avx512f")]
54668    unsafe fn test_mm512_zextps128_ps512() {
54669        let a = _mm_setr_ps(17., 18., 19., 20.);
54670        let r = _mm512_zextps128_ps512(a);
54671        let e = _mm512_setr_ps(
54672            17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
54673        );
54674        assert_eq_m512(r, e);
54675    }
54676
54677    #[simd_test(enable = "avx512f")]
54678    unsafe fn test_mm512_zextps256_ps512() {
54679        let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54680        let r = _mm512_zextps256_ps512(a);
54681        let e = _mm512_setr_ps(
54682            17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0.,
54683        );
54684        assert_eq_m512(r, e);
54685    }
54686
54687    #[simd_test(enable = "avx512f")]
54688    unsafe fn test_mm512_castps512_ps128() {
54689        let a = _mm512_setr_ps(
54690            17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
54691        );
54692        let r = _mm512_castps512_ps128(a);
54693        let e = _mm_setr_ps(17., 18., 19., 20.);
54694        assert_eq_m128(r, e);
54695    }
54696
54697    #[simd_test(enable = "avx512f")]
54698    unsafe fn test_mm512_castps512_ps256() {
54699        let a = _mm512_setr_ps(
54700            17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
54701        );
54702        let r = _mm512_castps512_ps256(a);
54703        let e = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54704        assert_eq_m256(r, e);
54705    }
54706
54707    #[simd_test(enable = "avx512f")]
54708    unsafe fn test_mm512_castps_pd() {
54709        let a = _mm512_set1_ps(1.);
54710        let r = _mm512_castps_pd(a);
54711        let e = _mm512_set1_pd(0.007812501848093234);
54712        assert_eq_m512d(r, e);
54713    }
54714
54715    #[simd_test(enable = "avx512f")]
54716    unsafe fn test_mm512_castps_si512() {
54717        let a = _mm512_set1_ps(1.);
54718        let r = _mm512_castps_si512(a);
54719        let e = _mm512_set1_epi32(1065353216);
54720        assert_eq_m512i(r, e);
54721    }
54722
54723    #[simd_test(enable = "avx512f")]
54724    unsafe fn test_mm512_broadcastd_epi32() {
54725        let a = _mm_set_epi32(17, 18, 19, 20);
54726        let r = _mm512_broadcastd_epi32(a);
54727        let e = _mm512_set1_epi32(20);
54728        assert_eq_m512i(r, e);
54729    }
54730
54731    #[simd_test(enable = "avx512f")]
54732    unsafe fn test_mm512_mask_broadcastd_epi32() {
54733        let src = _mm512_set1_epi32(20);
54734        let a = _mm_set_epi32(17, 18, 19, 20);
54735        let r = _mm512_mask_broadcastd_epi32(src, 0, a);
54736        assert_eq_m512i(r, src);
54737        let r = _mm512_mask_broadcastd_epi32(src, 0b11111111_11111111, a);
54738        let e = _mm512_set1_epi32(20);
54739        assert_eq_m512i(r, e);
54740    }
54741
54742    #[simd_test(enable = "avx512f")]
54743    unsafe fn test_mm512_maskz_broadcastd_epi32() {
54744        let a = _mm_set_epi32(17, 18, 19, 20);
54745        let r = _mm512_maskz_broadcastd_epi32(0, a);
54746        assert_eq_m512i(r, _mm512_setzero_si512());
54747        let r = _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a);
54748        let e = _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0);
54749        assert_eq_m512i(r, e);
54750    }
54751
54752    #[simd_test(enable = "avx512f,avx512vl")]
54753    unsafe fn test_mm256_mask_broadcastd_epi32() {
54754        let src = _mm256_set1_epi32(20);
54755        let a = _mm_set_epi32(17, 18, 19, 20);
54756        let r = _mm256_mask_broadcastd_epi32(src, 0, a);
54757        assert_eq_m256i(r, src);
54758        let r = _mm256_mask_broadcastd_epi32(src, 0b11111111, a);
54759        let e = _mm256_set1_epi32(20);
54760        assert_eq_m256i(r, e);
54761    }
54762
54763    #[simd_test(enable = "avx512f,avx512vl")]
54764    unsafe fn test_mm256_maskz_broadcastd_epi32() {
54765        let a = _mm_set_epi32(17, 18, 19, 20);
54766        let r = _mm256_maskz_broadcastd_epi32(0, a);
54767        assert_eq_m256i(r, _mm256_setzero_si256());
54768        let r = _mm256_maskz_broadcastd_epi32(0b11111111, a);
54769        let e = _mm256_set1_epi32(20);
54770        assert_eq_m256i(r, e);
54771    }
54772
54773    #[simd_test(enable = "avx512f,avx512vl")]
54774    unsafe fn test_mm_mask_broadcastd_epi32() {
54775        let src = _mm_set1_epi32(20);
54776        let a = _mm_set_epi32(17, 18, 19, 20);
54777        let r = _mm_mask_broadcastd_epi32(src, 0, a);
54778        assert_eq_m128i(r, src);
54779        let r = _mm_mask_broadcastd_epi32(src, 0b00001111, a);
54780        let e = _mm_set1_epi32(20);
54781        assert_eq_m128i(r, e);
54782    }
54783
54784    #[simd_test(enable = "avx512f,avx512vl")]
54785    unsafe fn test_mm_maskz_broadcastd_epi32() {
54786        let a = _mm_set_epi32(17, 18, 19, 20);
54787        let r = _mm_maskz_broadcastd_epi32(0, a);
54788        assert_eq_m128i(r, _mm_setzero_si128());
54789        let r = _mm_maskz_broadcastd_epi32(0b00001111, a);
54790        let e = _mm_set1_epi32(20);
54791        assert_eq_m128i(r, e);
54792    }
54793
54794    #[simd_test(enable = "avx512f")]
54795    unsafe fn test_mm512_broadcastss_ps() {
54796        let a = _mm_set_ps(17., 18., 19., 20.);
54797        let r = _mm512_broadcastss_ps(a);
54798        let e = _mm512_set1_ps(20.);
54799        assert_eq_m512(r, e);
54800    }
54801
54802    #[simd_test(enable = "avx512f")]
54803    unsafe fn test_mm512_mask_broadcastss_ps() {
54804        let src = _mm512_set1_ps(20.);
54805        let a = _mm_set_ps(17., 18., 19., 20.);
54806        let r = _mm512_mask_broadcastss_ps(src, 0, a);
54807        assert_eq_m512(r, src);
54808        let r = _mm512_mask_broadcastss_ps(src, 0b11111111_11111111, a);
54809        let e = _mm512_set1_ps(20.);
54810        assert_eq_m512(r, e);
54811    }
54812
54813    #[simd_test(enable = "avx512f")]
54814    unsafe fn test_mm512_maskz_broadcastss_ps() {
54815        let a = _mm_set_ps(17., 18., 19., 20.);
54816        let r = _mm512_maskz_broadcastss_ps(0, a);
54817        assert_eq_m512(r, _mm512_setzero_ps());
54818        let r = _mm512_maskz_broadcastss_ps(0b00000000_11111111, a);
54819        let e = _mm512_setr_ps(
54820            20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0.,
54821        );
54822        assert_eq_m512(r, e);
54823    }
54824
54825    #[simd_test(enable = "avx512f,avx512vl")]
54826    unsafe fn test_mm256_mask_broadcastss_ps() {
54827        let src = _mm256_set1_ps(20.);
54828        let a = _mm_set_ps(17., 18., 19., 20.);
54829        let r = _mm256_mask_broadcastss_ps(src, 0, a);
54830        assert_eq_m256(r, src);
54831        let r = _mm256_mask_broadcastss_ps(src, 0b11111111, a);
54832        let e = _mm256_set1_ps(20.);
54833        assert_eq_m256(r, e);
54834    }
54835
54836    #[simd_test(enable = "avx512f,avx512vl")]
54837    unsafe fn test_mm256_maskz_broadcastss_ps() {
54838        let a = _mm_set_ps(17., 18., 19., 20.);
54839        let r = _mm256_maskz_broadcastss_ps(0, a);
54840        assert_eq_m256(r, _mm256_setzero_ps());
54841        let r = _mm256_maskz_broadcastss_ps(0b11111111, a);
54842        let e = _mm256_set1_ps(20.);
54843        assert_eq_m256(r, e);
54844    }
54845
54846    #[simd_test(enable = "avx512f,avx512vl")]
54847    unsafe fn test_mm_mask_broadcastss_ps() {
54848        let src = _mm_set1_ps(20.);
54849        let a = _mm_set_ps(17., 18., 19., 20.);
54850        let r = _mm_mask_broadcastss_ps(src, 0, a);
54851        assert_eq_m128(r, src);
54852        let r = _mm_mask_broadcastss_ps(src, 0b00001111, a);
54853        let e = _mm_set1_ps(20.);
54854        assert_eq_m128(r, e);
54855    }
54856
54857    #[simd_test(enable = "avx512f,avx512vl")]
54858    unsafe fn test_mm_maskz_broadcastss_ps() {
54859        let a = _mm_set_ps(17., 18., 19., 20.);
54860        let r = _mm_maskz_broadcastss_ps(0, a);
54861        assert_eq_m128(r, _mm_setzero_ps());
54862        let r = _mm_maskz_broadcastss_ps(0b00001111, a);
54863        let e = _mm_set1_ps(20.);
54864        assert_eq_m128(r, e);
54865    }
54866
54867    #[simd_test(enable = "avx512f")]
54868    unsafe fn test_mm512_broadcast_i32x4() {
54869        let a = _mm_set_epi32(17, 18, 19, 20);
54870        let r = _mm512_broadcast_i32x4(a);
54871        let e = _mm512_set_epi32(
54872            17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
54873        );
54874        assert_eq_m512i(r, e);
54875    }
54876
54877    #[simd_test(enable = "avx512f")]
54878    unsafe fn test_mm512_mask_broadcast_i32x4() {
54879        let src = _mm512_set1_epi32(20);
54880        let a = _mm_set_epi32(17, 18, 19, 20);
54881        let r = _mm512_mask_broadcast_i32x4(src, 0, a);
54882        assert_eq_m512i(r, src);
54883        let r = _mm512_mask_broadcast_i32x4(src, 0b11111111_11111111, a);
54884        let e = _mm512_set_epi32(
54885            17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
54886        );
54887        assert_eq_m512i(r, e);
54888    }
54889
54890    #[simd_test(enable = "avx512f")]
54891    unsafe fn test_mm512_maskz_broadcast_i32x4() {
54892        let a = _mm_set_epi32(17, 18, 19, 20);
54893        let r = _mm512_maskz_broadcast_i32x4(0, a);
54894        assert_eq_m512i(r, _mm512_setzero_si512());
54895        let r = _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a);
54896        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20);
54897        assert_eq_m512i(r, e);
54898    }
54899
54900    #[simd_test(enable = "avx512f,avx512vl")]
54901    unsafe fn test_mm256_broadcast_i32x4() {
54902        let a = _mm_set_epi32(17, 18, 19, 20);
54903        let r = _mm256_broadcast_i32x4(a);
54904        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54905        assert_eq_m256i(r, e);
54906    }
54907
54908    #[simd_test(enable = "avx512f,avx512vl")]
54909    unsafe fn test_mm256_mask_broadcast_i32x4() {
54910        let src = _mm256_set1_epi32(20);
54911        let a = _mm_set_epi32(17, 18, 19, 20);
54912        let r = _mm256_mask_broadcast_i32x4(src, 0, a);
54913        assert_eq_m256i(r, src);
54914        let r = _mm256_mask_broadcast_i32x4(src, 0b11111111, a);
54915        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54916        assert_eq_m256i(r, e);
54917    }
54918
54919    #[simd_test(enable = "avx512f,avx512vl")]
54920    unsafe fn test_mm256_maskz_broadcast_i32x4() {
54921        let a = _mm_set_epi32(17, 18, 19, 20);
54922        let r = _mm256_maskz_broadcast_i32x4(0, a);
54923        assert_eq_m256i(r, _mm256_setzero_si256());
54924        let r = _mm256_maskz_broadcast_i32x4(0b11111111, a);
54925        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54926        assert_eq_m256i(r, e);
54927    }
54928
54929    #[simd_test(enable = "avx512f")]
54930    unsafe fn test_mm512_broadcast_f32x4() {
54931        let a = _mm_set_ps(17., 18., 19., 20.);
54932        let r = _mm512_broadcast_f32x4(a);
54933        let e = _mm512_set_ps(
54934            17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
54935        );
54936        assert_eq_m512(r, e);
54937    }
54938
54939    #[simd_test(enable = "avx512f")]
54940    unsafe fn test_mm512_mask_broadcast_f32x4() {
54941        let src = _mm512_set1_ps(20.);
54942        let a = _mm_set_ps(17., 18., 19., 20.);
54943        let r = _mm512_mask_broadcast_f32x4(src, 0, a);
54944        assert_eq_m512(r, src);
54945        let r = _mm512_mask_broadcast_f32x4(src, 0b11111111_11111111, a);
54946        let e = _mm512_set_ps(
54947            17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
54948        );
54949        assert_eq_m512(r, e);
54950    }
54951
54952    #[simd_test(enable = "avx512f")]
54953    unsafe fn test_mm512_maskz_broadcast_f32x4() {
54954        let a = _mm_set_ps(17., 18., 19., 20.);
54955        let r = _mm512_maskz_broadcast_f32x4(0, a);
54956        assert_eq_m512(r, _mm512_setzero_ps());
54957        let r = _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a);
54958        let e = _mm512_set_ps(
54959            0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20.,
54960        );
54961        assert_eq_m512(r, e);
54962    }
54963
54964    #[simd_test(enable = "avx512f,avx512vl")]
54965    unsafe fn test_mm256_broadcast_f32x4() {
54966        let a = _mm_set_ps(17., 18., 19., 20.);
54967        let r = _mm256_broadcast_f32x4(a);
54968        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54969        assert_eq_m256(r, e);
54970    }
54971
54972    #[simd_test(enable = "avx512f,avx512vl")]
54973    unsafe fn test_mm256_mask_broadcast_f32x4() {
54974        let src = _mm256_set1_ps(20.);
54975        let a = _mm_set_ps(17., 18., 19., 20.);
54976        let r = _mm256_mask_broadcast_f32x4(src, 0, a);
54977        assert_eq_m256(r, src);
54978        let r = _mm256_mask_broadcast_f32x4(src, 0b11111111, a);
54979        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54980        assert_eq_m256(r, e);
54981    }
54982
54983    #[simd_test(enable = "avx512f,avx512vl")]
54984    unsafe fn test_mm256_maskz_broadcast_f32x4() {
54985        let a = _mm_set_ps(17., 18., 19., 20.);
54986        let r = _mm256_maskz_broadcast_f32x4(0, a);
54987        assert_eq_m256(r, _mm256_setzero_ps());
54988        let r = _mm256_maskz_broadcast_f32x4(0b11111111, a);
54989        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54990        assert_eq_m256(r, e);
54991    }
54992
54993    #[simd_test(enable = "avx512f")]
54994    unsafe fn test_mm512_mask_blend_epi32() {
54995        let a = _mm512_set1_epi32(1);
54996        let b = _mm512_set1_epi32(2);
54997        let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b);
54998        let e = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
54999        assert_eq_m512i(r, e);
55000    }
55001
55002    #[simd_test(enable = "avx512f,avx512vl")]
55003    unsafe fn test_mm256_mask_blend_epi32() {
55004        let a = _mm256_set1_epi32(1);
55005        let b = _mm256_set1_epi32(2);
55006        let r = _mm256_mask_blend_epi32(0b11111111, a, b);
55007        let e = _mm256_set1_epi32(2);
55008        assert_eq_m256i(r, e);
55009    }
55010
55011    #[simd_test(enable = "avx512f,avx512vl")]
55012    unsafe fn test_mm_mask_blend_epi32() {
55013        let a = _mm_set1_epi32(1);
55014        let b = _mm_set1_epi32(2);
55015        let r = _mm_mask_blend_epi32(0b00001111, a, b);
55016        let e = _mm_set1_epi32(2);
55017        assert_eq_m128i(r, e);
55018    }
55019
55020    #[simd_test(enable = "avx512f")]
55021    unsafe fn test_mm512_mask_blend_ps() {
55022        let a = _mm512_set1_ps(1.);
55023        let b = _mm512_set1_ps(2.);
55024        let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b);
55025        let e = _mm512_set_ps(
55026            2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
55027        );
55028        assert_eq_m512(r, e);
55029    }
55030
55031    #[simd_test(enable = "avx512f,avx512vl")]
55032    unsafe fn test_mm256_mask_blend_ps() {
55033        let a = _mm256_set1_ps(1.);
55034        let b = _mm256_set1_ps(2.);
55035        let r = _mm256_mask_blend_ps(0b11111111, a, b);
55036        let e = _mm256_set1_ps(2.);
55037        assert_eq_m256(r, e);
55038    }
55039
55040    #[simd_test(enable = "avx512f,avx512vl")]
55041    unsafe fn test_mm_mask_blend_ps() {
55042        let a = _mm_set1_ps(1.);
55043        let b = _mm_set1_ps(2.);
55044        let r = _mm_mask_blend_ps(0b00001111, a, b);
55045        let e = _mm_set1_ps(2.);
55046        assert_eq_m128(r, e);
55047    }
55048
55049    #[simd_test(enable = "avx512f")]
55050    unsafe fn test_mm512_unpackhi_epi32() {
55051        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55052        let b = _mm512_set_epi32(
55053            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55054        );
55055        let r = _mm512_unpackhi_epi32(a, b);
55056        let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
55057        assert_eq_m512i(r, e);
55058    }
55059
55060    #[simd_test(enable = "avx512f")]
55061    unsafe fn test_mm512_mask_unpackhi_epi32() {
55062        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55063        let b = _mm512_set_epi32(
55064            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55065        );
55066        let r = _mm512_mask_unpackhi_epi32(a, 0, a, b);
55067        assert_eq_m512i(r, a);
55068        let r = _mm512_mask_unpackhi_epi32(a, 0b11111111_11111111, a, b);
55069        let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
55070        assert_eq_m512i(r, e);
55071    }
55072
55073    #[simd_test(enable = "avx512f")]
55074    unsafe fn test_mm512_maskz_unpackhi_epi32() {
55075        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55076        let b = _mm512_set_epi32(
55077            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55078        );
55079        let r = _mm512_maskz_unpackhi_epi32(0, a, b);
55080        assert_eq_m512i(r, _mm512_setzero_si512());
55081        let r = _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a, b);
55082        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14);
55083        assert_eq_m512i(r, e);
55084    }
55085
55086    #[simd_test(enable = "avx512f,avx512vl")]
55087    unsafe fn test_mm256_mask_unpackhi_epi32() {
55088        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55089        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55090        let r = _mm256_mask_unpackhi_epi32(a, 0, a, b);
55091        assert_eq_m256i(r, a);
55092        let r = _mm256_mask_unpackhi_epi32(a, 0b11111111, a, b);
55093        let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
55094        assert_eq_m256i(r, e);
55095    }
55096
55097    #[simd_test(enable = "avx512f,avx512vl")]
55098    unsafe fn test_mm256_maskz_unpackhi_epi32() {
55099        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55100        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55101        let r = _mm256_maskz_unpackhi_epi32(0, a, b);
55102        assert_eq_m256i(r, _mm256_setzero_si256());
55103        let r = _mm256_maskz_unpackhi_epi32(0b11111111, a, b);
55104        let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
55105        assert_eq_m256i(r, e);
55106    }
55107
55108    #[simd_test(enable = "avx512f,avx512vl")]
55109    unsafe fn test_mm_mask_unpackhi_epi32() {
55110        let a = _mm_set_epi32(1, 2, 3, 4);
55111        let b = _mm_set_epi32(17, 18, 19, 20);
55112        let r = _mm_mask_unpackhi_epi32(a, 0, a, b);
55113        assert_eq_m128i(r, a);
55114        let r = _mm_mask_unpackhi_epi32(a, 0b00001111, a, b);
55115        let e = _mm_set_epi32(17, 1, 18, 2);
55116        assert_eq_m128i(r, e);
55117    }
55118
55119    #[simd_test(enable = "avx512f,avx512vl")]
55120    unsafe fn test_mm_maskz_unpackhi_epi32() {
55121        let a = _mm_set_epi32(1, 2, 3, 4);
55122        let b = _mm_set_epi32(17, 18, 19, 20);
55123        let r = _mm_maskz_unpackhi_epi32(0, a, b);
55124        assert_eq_m128i(r, _mm_setzero_si128());
55125        let r = _mm_maskz_unpackhi_epi32(0b00001111, a, b);
55126        let e = _mm_set_epi32(17, 1, 18, 2);
55127        assert_eq_m128i(r, e);
55128    }
55129
55130    #[simd_test(enable = "avx512f")]
55131    unsafe fn test_mm512_unpackhi_ps() {
55132        let a = _mm512_set_ps(
55133            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55134        );
55135        let b = _mm512_set_ps(
55136            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55137        );
55138        let r = _mm512_unpackhi_ps(a, b);
55139        let e = _mm512_set_ps(
55140            17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
55141        );
55142        assert_eq_m512(r, e);
55143    }
55144
55145    #[simd_test(enable = "avx512f")]
55146    unsafe fn test_mm512_mask_unpackhi_ps() {
55147        let a = _mm512_set_ps(
55148            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55149        );
55150        let b = _mm512_set_ps(
55151            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55152        );
55153        let r = _mm512_mask_unpackhi_ps(a, 0, a, b);
55154        assert_eq_m512(r, a);
55155        let r = _mm512_mask_unpackhi_ps(a, 0b11111111_11111111, a, b);
55156        let e = _mm512_set_ps(
55157            17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
55158        );
55159        assert_eq_m512(r, e);
55160    }
55161
55162    #[simd_test(enable = "avx512f")]
55163    unsafe fn test_mm512_maskz_unpackhi_ps() {
55164        let a = _mm512_set_ps(
55165            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55166        );
55167        let b = _mm512_set_ps(
55168            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55169        );
55170        let r = _mm512_maskz_unpackhi_ps(0, a, b);
55171        assert_eq_m512(r, _mm512_setzero_ps());
55172        let r = _mm512_maskz_unpackhi_ps(0b00000000_11111111, a, b);
55173        let e = _mm512_set_ps(
55174            0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14.,
55175        );
55176        assert_eq_m512(r, e);
55177    }
55178
55179    #[simd_test(enable = "avx512f,avx512vl")]
55180    unsafe fn test_mm256_mask_unpackhi_ps() {
55181        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55182        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55183        let r = _mm256_mask_unpackhi_ps(a, 0, a, b);
55184        assert_eq_m256(r, a);
55185        let r = _mm256_mask_unpackhi_ps(a, 0b11111111, a, b);
55186        let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
55187        assert_eq_m256(r, e);
55188    }
55189
55190    #[simd_test(enable = "avx512f,avx512vl")]
55191    unsafe fn test_mm256_maskz_unpackhi_ps() {
55192        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55193        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55194        let r = _mm256_maskz_unpackhi_ps(0, a, b);
55195        assert_eq_m256(r, _mm256_setzero_ps());
55196        let r = _mm256_maskz_unpackhi_ps(0b11111111, a, b);
55197        let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
55198        assert_eq_m256(r, e);
55199    }
55200
55201    #[simd_test(enable = "avx512f,avx512vl")]
55202    unsafe fn test_mm_mask_unpackhi_ps() {
55203        let a = _mm_set_ps(1., 2., 3., 4.);
55204        let b = _mm_set_ps(17., 18., 19., 20.);
55205        let r = _mm_mask_unpackhi_ps(a, 0, a, b);
55206        assert_eq_m128(r, a);
55207        let r = _mm_mask_unpackhi_ps(a, 0b00001111, a, b);
55208        let e = _mm_set_ps(17., 1., 18., 2.);
55209        assert_eq_m128(r, e);
55210    }
55211
55212    #[simd_test(enable = "avx512f,avx512vl")]
55213    unsafe fn test_mm_maskz_unpackhi_ps() {
55214        let a = _mm_set_ps(1., 2., 3., 4.);
55215        let b = _mm_set_ps(17., 18., 19., 20.);
55216        let r = _mm_maskz_unpackhi_ps(0, a, b);
55217        assert_eq_m128(r, _mm_setzero_ps());
55218        let r = _mm_maskz_unpackhi_ps(0b00001111, a, b);
55219        let e = _mm_set_ps(17., 1., 18., 2.);
55220        assert_eq_m128(r, e);
55221    }
55222
55223    #[simd_test(enable = "avx512f")]
55224    unsafe fn test_mm512_unpacklo_epi32() {
55225        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55226        let b = _mm512_set_epi32(
55227            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55228        );
55229        let r = _mm512_unpacklo_epi32(a, b);
55230        let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
55231        assert_eq_m512i(r, e);
55232    }
55233
55234    #[simd_test(enable = "avx512f")]
55235    unsafe fn test_mm512_mask_unpacklo_epi32() {
55236        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55237        let b = _mm512_set_epi32(
55238            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55239        );
55240        let r = _mm512_mask_unpacklo_epi32(a, 0, a, b);
55241        assert_eq_m512i(r, a);
55242        let r = _mm512_mask_unpacklo_epi32(a, 0b11111111_11111111, a, b);
55243        let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
55244        assert_eq_m512i(r, e);
55245    }
55246
55247    #[simd_test(enable = "avx512f")]
55248    unsafe fn test_mm512_maskz_unpacklo_epi32() {
55249        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55250        let b = _mm512_set_epi32(
55251            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55252        );
55253        let r = _mm512_maskz_unpacklo_epi32(0, a, b);
55254        assert_eq_m512i(r, _mm512_setzero_si512());
55255        let r = _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a, b);
55256        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16);
55257        assert_eq_m512i(r, e);
55258    }
55259
55260    #[simd_test(enable = "avx512f,avx512vl")]
55261    unsafe fn test_mm256_mask_unpacklo_epi32() {
55262        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55263        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55264        let r = _mm256_mask_unpacklo_epi32(a, 0, a, b);
55265        assert_eq_m256i(r, a);
55266        let r = _mm256_mask_unpacklo_epi32(a, 0b11111111, a, b);
55267        let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
55268        assert_eq_m256i(r, e);
55269    }
55270
55271    #[simd_test(enable = "avx512f,avx512vl")]
55272    unsafe fn test_mm256_maskz_unpacklo_epi32() {
55273        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55274        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55275        let r = _mm256_maskz_unpacklo_epi32(0, a, b);
55276        assert_eq_m256i(r, _mm256_setzero_si256());
55277        let r = _mm256_maskz_unpacklo_epi32(0b11111111, a, b);
55278        let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
55279        assert_eq_m256i(r, e);
55280    }
55281
55282    #[simd_test(enable = "avx512f,avx512vl")]
55283    unsafe fn test_mm_mask_unpacklo_epi32() {
55284        let a = _mm_set_epi32(1, 2, 3, 4);
55285        let b = _mm_set_epi32(17, 18, 19, 20);
55286        let r = _mm_mask_unpacklo_epi32(a, 0, a, b);
55287        assert_eq_m128i(r, a);
55288        let r = _mm_mask_unpacklo_epi32(a, 0b00001111, a, b);
55289        let e = _mm_set_epi32(19, 3, 20, 4);
55290        assert_eq_m128i(r, e);
55291    }
55292
55293    #[simd_test(enable = "avx512f,avx512vl")]
55294    unsafe fn test_mm_maskz_unpacklo_epi32() {
55295        let a = _mm_set_epi32(1, 2, 3, 4);
55296        let b = _mm_set_epi32(17, 18, 19, 20);
55297        let r = _mm_maskz_unpacklo_epi32(0, a, b);
55298        assert_eq_m128i(r, _mm_setzero_si128());
55299        let r = _mm_maskz_unpacklo_epi32(0b00001111, a, b);
55300        let e = _mm_set_epi32(19, 3, 20, 4);
55301        assert_eq_m128i(r, e);
55302    }
55303
55304    #[simd_test(enable = "avx512f")]
55305    unsafe fn test_mm512_unpacklo_ps() {
55306        let a = _mm512_set_ps(
55307            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55308        );
55309        let b = _mm512_set_ps(
55310            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55311        );
55312        let r = _mm512_unpacklo_ps(a, b);
55313        let e = _mm512_set_ps(
55314            19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
55315        );
55316        assert_eq_m512(r, e);
55317    }
55318
55319    #[simd_test(enable = "avx512f")]
55320    unsafe fn test_mm512_mask_unpacklo_ps() {
55321        let a = _mm512_set_ps(
55322            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55323        );
55324        let b = _mm512_set_ps(
55325            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55326        );
55327        let r = _mm512_mask_unpacklo_ps(a, 0, a, b);
55328        assert_eq_m512(r, a);
55329        let r = _mm512_mask_unpacklo_ps(a, 0b11111111_11111111, a, b);
55330        let e = _mm512_set_ps(
55331            19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
55332        );
55333        assert_eq_m512(r, e);
55334    }
55335
55336    #[simd_test(enable = "avx512f")]
55337    unsafe fn test_mm512_maskz_unpacklo_ps() {
55338        let a = _mm512_set_ps(
55339            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55340        );
55341        let b = _mm512_set_ps(
55342            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55343        );
55344        let r = _mm512_maskz_unpacklo_ps(0, a, b);
55345        assert_eq_m512(r, _mm512_setzero_ps());
55346        let r = _mm512_maskz_unpacklo_ps(0b00000000_11111111, a, b);
55347        let e = _mm512_set_ps(
55348            0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16.,
55349        );
55350        assert_eq_m512(r, e);
55351    }
55352
55353    #[simd_test(enable = "avx512f,avx512vl")]
55354    unsafe fn test_mm256_mask_unpacklo_ps() {
55355        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55356        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55357        let r = _mm256_mask_unpacklo_ps(a, 0, a, b);
55358        assert_eq_m256(r, a);
55359        let r = _mm256_mask_unpacklo_ps(a, 0b11111111, a, b);
55360        let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
55361        assert_eq_m256(r, e);
55362    }
55363
55364    #[simd_test(enable = "avx512f,avx512vl")]
55365    unsafe fn test_mm256_maskz_unpacklo_ps() {
55366        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55367        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55368        let r = _mm256_maskz_unpacklo_ps(0, a, b);
55369        assert_eq_m256(r, _mm256_setzero_ps());
55370        let r = _mm256_maskz_unpacklo_ps(0b11111111, a, b);
55371        let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
55372        assert_eq_m256(r, e);
55373    }
55374
55375    #[simd_test(enable = "avx512f,avx512vl")]
55376    unsafe fn test_mm_mask_unpacklo_ps() {
55377        let a = _mm_set_ps(1., 2., 3., 4.);
55378        let b = _mm_set_ps(17., 18., 19., 20.);
55379        let r = _mm_mask_unpacklo_ps(a, 0, a, b);
55380        assert_eq_m128(r, a);
55381        let r = _mm_mask_unpacklo_ps(a, 0b00001111, a, b);
55382        let e = _mm_set_ps(19., 3., 20., 4.);
55383        assert_eq_m128(r, e);
55384    }
55385
55386    #[simd_test(enable = "avx512f,avx512vl")]
55387    unsafe fn test_mm_maskz_unpacklo_ps() {
55388        let a = _mm_set_ps(1., 2., 3., 4.);
55389        let b = _mm_set_ps(17., 18., 19., 20.);
55390        let r = _mm_maskz_unpacklo_ps(0, a, b);
55391        assert_eq_m128(r, _mm_setzero_ps());
55392        let r = _mm_maskz_unpacklo_ps(0b00001111, a, b);
55393        let e = _mm_set_ps(19., 3., 20., 4.);
55394        assert_eq_m128(r, e);
55395    }
55396
55397    #[simd_test(enable = "avx512f")]
55398    unsafe fn test_mm512_alignr_epi32() {
55399        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55400        let b = _mm512_set_epi32(
55401            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55402        );
55403        let r = _mm512_alignr_epi32::<0>(a, b);
55404        assert_eq_m512i(r, b);
55405        let r = _mm512_alignr_epi32::<16>(a, b);
55406        assert_eq_m512i(r, b);
55407        let r = _mm512_alignr_epi32::<1>(a, b);
55408        let e = _mm512_set_epi32(
55409            1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
55410        );
55411        assert_eq_m512i(r, e);
55412    }
55413
55414    #[simd_test(enable = "avx512f")]
55415    unsafe fn test_mm512_mask_alignr_epi32() {
55416        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55417        let b = _mm512_set_epi32(
55418            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55419        );
55420        let r = _mm512_mask_alignr_epi32::<1>(a, 0, a, b);
55421        assert_eq_m512i(r, a);
55422        let r = _mm512_mask_alignr_epi32::<1>(a, 0b11111111_11111111, a, b);
55423        let e = _mm512_set_epi32(
55424            1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
55425        );
55426        assert_eq_m512i(r, e);
55427    }
55428
55429    #[simd_test(enable = "avx512f")]
55430    unsafe fn test_mm512_maskz_alignr_epi32() {
55431        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55432        let b = _mm512_set_epi32(
55433            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55434        );
55435        let r = _mm512_maskz_alignr_epi32::<1>(0, a, b);
55436        assert_eq_m512i(r, _mm512_setzero_si512());
55437        let r = _mm512_maskz_alignr_epi32::<1>(0b00000000_11111111, a, b);
55438        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18);
55439        assert_eq_m512i(r, e);
55440    }
55441
55442    #[simd_test(enable = "avx512f,avx512vl")]
55443    unsafe fn test_mm256_alignr_epi32() {
55444        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55445        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55446        let r = _mm256_alignr_epi32::<0>(a, b);
55447        assert_eq_m256i(r, b);
55448        let r = _mm256_alignr_epi32::<1>(a, b);
55449        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55450        assert_eq_m256i(r, e);
55451    }
55452
55453    #[simd_test(enable = "avx512f,avx512vl")]
55454    unsafe fn test_mm256_mask_alignr_epi32() {
55455        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55456        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55457        let r = _mm256_mask_alignr_epi32::<1>(a, 0, a, b);
55458        assert_eq_m256i(r, a);
55459        let r = _mm256_mask_alignr_epi32::<1>(a, 0b11111111, a, b);
55460        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55461        assert_eq_m256i(r, e);
55462    }
55463
55464    #[simd_test(enable = "avx512f,avx512vl")]
55465    unsafe fn test_mm256_maskz_alignr_epi32() {
55466        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55467        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55468        let r = _mm256_maskz_alignr_epi32::<1>(0, a, b);
55469        assert_eq_m256i(r, _mm256_setzero_si256());
55470        let r = _mm256_maskz_alignr_epi32::<1>(0b11111111, a, b);
55471        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55472        assert_eq_m256i(r, e);
55473    }
55474
55475    #[simd_test(enable = "avx512f,avx512vl")]
55476    unsafe fn test_mm_alignr_epi32() {
55477        let a = _mm_set_epi32(4, 3, 2, 1);
55478        let b = _mm_set_epi32(8, 7, 6, 5);
55479        let r = _mm_alignr_epi32::<0>(a, b);
55480        assert_eq_m128i(r, b);
55481        let r = _mm_alignr_epi32::<1>(a, b);
55482        let e = _mm_set_epi32(1, 8, 7, 6);
55483        assert_eq_m128i(r, e);
55484    }
55485
55486    #[simd_test(enable = "avx512f,avx512vl")]
55487    unsafe fn test_mm_mask_alignr_epi32() {
55488        let a = _mm_set_epi32(4, 3, 2, 1);
55489        let b = _mm_set_epi32(8, 7, 6, 5);
55490        let r = _mm_mask_alignr_epi32::<1>(a, 0, a, b);
55491        assert_eq_m128i(r, a);
55492        let r = _mm_mask_alignr_epi32::<1>(a, 0b00001111, a, b);
55493        let e = _mm_set_epi32(1, 8, 7, 6);
55494        assert_eq_m128i(r, e);
55495    }
55496
55497    #[simd_test(enable = "avx512f,avx512vl")]
55498    unsafe fn test_mm_maskz_alignr_epi32() {
55499        let a = _mm_set_epi32(4, 3, 2, 1);
55500        let b = _mm_set_epi32(8, 7, 6, 5);
55501        let r = _mm_maskz_alignr_epi32::<1>(0, a, b);
55502        assert_eq_m128i(r, _mm_setzero_si128());
55503        let r = _mm_maskz_alignr_epi32::<1>(0b00001111, a, b);
55504        let e = _mm_set_epi32(1, 8, 7, 6);
55505        assert_eq_m128i(r, e);
55506    }
55507
55508    #[simd_test(enable = "avx512f")]
55509    unsafe fn test_mm512_and_epi32() {
55510        #[rustfmt::skip]
55511        let a = _mm512_set_epi32(
55512            1 << 1 | 1 << 2, 0, 0, 0,
55513            0, 0, 0, 0,
55514            0, 0, 0, 0,
55515            0, 0, 0, 1 << 1 | 1 << 3,
55516        );
55517        #[rustfmt::skip]
55518        let b = _mm512_set_epi32(
55519            1 << 1, 0, 0, 0,
55520            0, 0, 0, 0,
55521            0, 0, 0, 0,
55522            0, 0, 0, 1 << 3 | 1 << 4,
55523        );
55524        let r = _mm512_and_epi32(a, b);
55525        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55526        assert_eq_m512i(r, e);
55527    }
55528
55529    #[simd_test(enable = "avx512f")]
55530    unsafe fn test_mm512_mask_and_epi32() {
55531        #[rustfmt::skip]
55532        let a = _mm512_set_epi32(
55533            1 << 1 | 1 << 2, 0, 0, 0,
55534            0, 0, 0, 0,
55535            0, 0, 0, 0,
55536            0, 0, 0, 1 << 1 | 1 << 3,
55537        );
55538        #[rustfmt::skip]
55539        let b = _mm512_set_epi32(
55540            1 << 1, 0, 0, 0,
55541            0, 0, 0, 0,
55542            0, 0, 0, 0,
55543            0, 0, 0, 1 << 3 | 1 << 4,
55544        );
55545        let r = _mm512_mask_and_epi32(a, 0, a, b);
55546        assert_eq_m512i(r, a);
55547        let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
55548        #[rustfmt::skip]
55549        let e = _mm512_set_epi32(
55550            1 << 1 | 1 << 2, 0, 0, 0,
55551            0, 0, 0, 0,
55552            0, 0, 0, 0,
55553            0, 0, 0, 1 << 3,
55554        );
55555        assert_eq_m512i(r, e);
55556    }
55557
55558    #[simd_test(enable = "avx512f")]
55559    unsafe fn test_mm512_maskz_and_epi32() {
55560        #[rustfmt::skip]
55561        let a = _mm512_set_epi32(
55562            1 << 1 | 1 << 2, 0, 0, 0,
55563            0, 0, 0, 0,
55564            0, 0, 0, 0,
55565            0, 0, 0, 1 << 1 | 1 << 3,
55566        );
55567        #[rustfmt::skip]
55568        let b = _mm512_set_epi32(
55569            1 << 1, 0, 0, 0,
55570            0, 0, 0, 0,
55571            0, 0, 0, 0,
55572            0, 0, 0, 1 << 3 | 1 << 4,
55573        );
55574        let r = _mm512_maskz_and_epi32(0, a, b);
55575        assert_eq_m512i(r, _mm512_setzero_si512());
55576        let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
55577        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55578        assert_eq_m512i(r, e);
55579    }
55580
55581    #[simd_test(enable = "avx512f,avx512vl")]
55582    unsafe fn test_mm256_mask_and_epi32() {
55583        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55584        let b = _mm256_set1_epi32(1 << 1);
55585        let r = _mm256_mask_and_epi32(a, 0, a, b);
55586        assert_eq_m256i(r, a);
55587        let r = _mm256_mask_and_epi32(a, 0b11111111, a, b);
55588        let e = _mm256_set1_epi32(1 << 1);
55589        assert_eq_m256i(r, e);
55590    }
55591
55592    #[simd_test(enable = "avx512f,avx512vl")]
55593    unsafe fn test_mm256_maskz_and_epi32() {
55594        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55595        let b = _mm256_set1_epi32(1 << 1);
55596        let r = _mm256_maskz_and_epi32(0, a, b);
55597        assert_eq_m256i(r, _mm256_setzero_si256());
55598        let r = _mm256_maskz_and_epi32(0b11111111, a, b);
55599        let e = _mm256_set1_epi32(1 << 1);
55600        assert_eq_m256i(r, e);
55601    }
55602
55603    #[simd_test(enable = "avx512f,avx512vl")]
55604    unsafe fn test_mm_mask_and_epi32() {
55605        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55606        let b = _mm_set1_epi32(1 << 1);
55607        let r = _mm_mask_and_epi32(a, 0, a, b);
55608        assert_eq_m128i(r, a);
55609        let r = _mm_mask_and_epi32(a, 0b00001111, a, b);
55610        let e = _mm_set1_epi32(1 << 1);
55611        assert_eq_m128i(r, e);
55612    }
55613
55614    #[simd_test(enable = "avx512f,avx512vl")]
55615    unsafe fn test_mm_maskz_and_epi32() {
55616        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55617        let b = _mm_set1_epi32(1 << 1);
55618        let r = _mm_maskz_and_epi32(0, a, b);
55619        assert_eq_m128i(r, _mm_setzero_si128());
55620        let r = _mm_maskz_and_epi32(0b00001111, a, b);
55621        let e = _mm_set1_epi32(1 << 1);
55622        assert_eq_m128i(r, e);
55623    }
55624
55625    #[simd_test(enable = "avx512f")]
55626    unsafe fn test_mm512_and_si512() {
55627        #[rustfmt::skip]
55628        let a = _mm512_set_epi32(
55629            1 << 1 | 1 << 2, 0, 0, 0,
55630            0, 0, 0, 0,
55631            0, 0, 0, 0,
55632            0, 0, 0, 1 << 1 | 1 << 3,
55633        );
55634        #[rustfmt::skip]
55635        let b = _mm512_set_epi32(
55636            1 << 1, 0, 0, 0,
55637            0, 0, 0, 0,
55638            0, 0, 0, 0,
55639            0, 0, 0, 1 << 3 | 1 << 4,
55640        );
55641        let r = _mm512_and_epi32(a, b);
55642        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55643        assert_eq_m512i(r, e);
55644    }
55645
55646    #[simd_test(enable = "avx512f")]
55647    unsafe fn test_mm512_or_epi32() {
55648        #[rustfmt::skip]
55649        let a = _mm512_set_epi32(
55650            1 << 1 | 1 << 2, 0, 0, 0,
55651            0, 0, 0, 0,
55652            0, 0, 0, 0,
55653            0, 0, 0, 1 << 1 | 1 << 3,
55654        );
55655        #[rustfmt::skip]
55656        let b = _mm512_set_epi32(
55657            1 << 1, 0, 0, 0,
55658            0, 0, 0, 0,
55659            0, 0, 0, 0,
55660            0, 0, 0, 1 << 3 | 1 << 4,
55661        );
55662        let r = _mm512_or_epi32(a, b);
55663        #[rustfmt::skip]
55664        let e = _mm512_set_epi32(
55665            1 << 1 | 1 << 2, 0, 0, 0,
55666            0, 0, 0, 0,
55667            0, 0, 0, 0,
55668            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55669        );
55670        assert_eq_m512i(r, e);
55671    }
55672
55673    #[simd_test(enable = "avx512f")]
55674    unsafe fn test_mm512_mask_or_epi32() {
55675        #[rustfmt::skip]
55676        let a = _mm512_set_epi32(
55677            1 << 1 | 1 << 2, 0, 0, 0,
55678            0, 0, 0, 0,
55679            0, 0, 0, 0,
55680            0, 0, 0, 1 << 1 | 1 << 3,
55681        );
55682        #[rustfmt::skip]
55683        let b = _mm512_set_epi32(
55684            1 << 1, 0, 0, 0,
55685            0, 0, 0, 0,
55686            0, 0, 0, 0,
55687            0, 0, 0, 1 << 3 | 1 << 4,
55688        );
55689        let r = _mm512_mask_or_epi32(a, 0, a, b);
55690        assert_eq_m512i(r, a);
55691        let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
55692        #[rustfmt::skip]
55693        let e = _mm512_set_epi32(
55694            1 << 1 | 1 << 2, 0, 0, 0,
55695            0, 0, 0, 0,
55696            0, 0, 0, 0,
55697            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55698        );
55699        assert_eq_m512i(r, e);
55700    }
55701
55702    #[simd_test(enable = "avx512f")]
55703    unsafe fn test_mm512_maskz_or_epi32() {
55704        #[rustfmt::skip]
55705        let a = _mm512_set_epi32(
55706            1 << 1 | 1 << 2, 0, 0, 0,
55707            0, 0, 0, 0,
55708            0, 0, 0, 0,
55709            0, 0, 0, 1 << 1 | 1 << 3,
55710        );
55711        #[rustfmt::skip]
55712        let b = _mm512_set_epi32(
55713            1 << 1, 0, 0, 0,
55714            0, 0, 0, 0,
55715            0, 0, 0, 0,
55716            0, 0, 0, 1 << 3 | 1 << 4,
55717        );
55718        let r = _mm512_maskz_or_epi32(0, a, b);
55719        assert_eq_m512i(r, _mm512_setzero_si512());
55720        let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
55721        #[rustfmt::skip]
55722        let e = _mm512_set_epi32(
55723            0, 0, 0, 0,
55724            0, 0, 0, 0,
55725            0, 0, 0, 0,
55726            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55727        );
55728        assert_eq_m512i(r, e);
55729    }
55730
55731    #[simd_test(enable = "avx512f,avx512vl")]
55732    unsafe fn test_mm256_or_epi32() {
55733        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55734        let b = _mm256_set1_epi32(1 << 1);
55735        let r = _mm256_or_epi32(a, b);
55736        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55737        assert_eq_m256i(r, e);
55738    }
55739
55740    #[simd_test(enable = "avx512f,avx512vl")]
55741    unsafe fn test_mm256_mask_or_epi32() {
55742        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55743        let b = _mm256_set1_epi32(1 << 1);
55744        let r = _mm256_mask_or_epi32(a, 0, a, b);
55745        assert_eq_m256i(r, a);
55746        let r = _mm256_mask_or_epi32(a, 0b11111111, a, b);
55747        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55748        assert_eq_m256i(r, e);
55749    }
55750
55751    #[simd_test(enable = "avx512f,avx512vl")]
55752    unsafe fn test_mm256_maskz_or_epi32() {
55753        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55754        let b = _mm256_set1_epi32(1 << 1);
55755        let r = _mm256_maskz_or_epi32(0, a, b);
55756        assert_eq_m256i(r, _mm256_setzero_si256());
55757        let r = _mm256_maskz_or_epi32(0b11111111, a, b);
55758        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55759        assert_eq_m256i(r, e);
55760    }
55761
55762    #[simd_test(enable = "avx512f,avx512vl")]
55763    unsafe fn test_mm_or_epi32() {
55764        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55765        let b = _mm_set1_epi32(1 << 1);
55766        let r = _mm_or_epi32(a, b);
55767        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55768        assert_eq_m128i(r, e);
55769    }
55770
55771    #[simd_test(enable = "avx512f,avx512vl")]
55772    unsafe fn test_mm_mask_or_epi32() {
55773        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55774        let b = _mm_set1_epi32(1 << 1);
55775        let r = _mm_mask_or_epi32(a, 0, a, b);
55776        assert_eq_m128i(r, a);
55777        let r = _mm_mask_or_epi32(a, 0b00001111, a, b);
55778        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55779        assert_eq_m128i(r, e);
55780    }
55781
55782    #[simd_test(enable = "avx512f,avx512vl")]
55783    unsafe fn test_mm_maskz_or_epi32() {
55784        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55785        let b = _mm_set1_epi32(1 << 1);
55786        let r = _mm_maskz_or_epi32(0, a, b);
55787        assert_eq_m128i(r, _mm_setzero_si128());
55788        let r = _mm_maskz_or_epi32(0b00001111, a, b);
55789        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55790        assert_eq_m128i(r, e);
55791    }
55792
55793    #[simd_test(enable = "avx512f")]
55794    unsafe fn test_mm512_or_si512() {
55795        #[rustfmt::skip]
55796        let a = _mm512_set_epi32(
55797            1 << 1 | 1 << 2, 0, 0, 0,
55798            0, 0, 0, 0,
55799            0, 0, 0, 0,
55800            0, 0, 0, 1 << 1 | 1 << 3,
55801        );
55802        #[rustfmt::skip]
55803        let b = _mm512_set_epi32(
55804            1 << 1, 0, 0, 0,
55805            0, 0, 0, 0,
55806            0, 0, 0, 0,
55807            0, 0, 0, 1 << 3 | 1 << 4,
55808        );
55809        let r = _mm512_or_epi32(a, b);
55810        #[rustfmt::skip]
55811        let e = _mm512_set_epi32(
55812            1 << 1 | 1 << 2, 0, 0, 0,
55813            0, 0, 0, 0,
55814            0, 0, 0, 0,
55815            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55816        );
55817        assert_eq_m512i(r, e);
55818    }
55819
55820    #[simd_test(enable = "avx512f")]
55821    unsafe fn test_mm512_xor_epi32() {
55822        #[rustfmt::skip]
55823        let a = _mm512_set_epi32(
55824            1 << 1 | 1 << 2, 0, 0, 0,
55825            0, 0, 0, 0,
55826            0, 0, 0, 0,
55827            0, 0, 0, 1 << 1 | 1 << 3,
55828        );
55829        #[rustfmt::skip]
55830        let b = _mm512_set_epi32(
55831            1 << 1, 0, 0, 0,
55832            0, 0, 0, 0,
55833            0, 0, 0, 0,
55834            0, 0, 0, 1 << 3 | 1 << 4,
55835        );
55836        let r = _mm512_xor_epi32(a, b);
55837        #[rustfmt::skip]
55838        let e = _mm512_set_epi32(
55839            1 << 2, 0, 0, 0,
55840            0, 0, 0, 0,
55841            0, 0, 0, 0,
55842            0, 0, 0, 1 << 1 | 1 << 4,
55843        );
55844        assert_eq_m512i(r, e);
55845    }
55846
55847    #[simd_test(enable = "avx512f")]
55848    unsafe fn test_mm512_mask_xor_epi32() {
55849        #[rustfmt::skip]
55850        let a = _mm512_set_epi32(
55851            1 << 1 | 1 << 2, 0, 0, 0,
55852            0, 0, 0, 0,
55853            0, 0, 0, 0,
55854            0, 0, 0, 1 << 1 | 1 << 3,
55855        );
55856        #[rustfmt::skip]
55857        let b = _mm512_set_epi32(
55858            1 << 1, 0, 0, 0,
55859            0, 0, 0, 0,
55860            0, 0, 0, 0,
55861            0, 0, 0, 1 << 3 | 1 << 4,
55862        );
55863        let r = _mm512_mask_xor_epi32(a, 0, a, b);
55864        assert_eq_m512i(r, a);
55865        let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
55866        #[rustfmt::skip]
55867        let e = _mm512_set_epi32(
55868            1 << 1 | 1 << 2, 0, 0, 0,
55869            0, 0, 0, 0,
55870            0, 0, 0, 0,
55871            0, 0, 0, 1 << 1 | 1 << 4,
55872        );
55873        assert_eq_m512i(r, e);
55874    }
55875
55876    #[simd_test(enable = "avx512f")]
55877    unsafe fn test_mm512_maskz_xor_epi32() {
55878        #[rustfmt::skip]
55879        let a = _mm512_set_epi32(
55880            1 << 1 | 1 << 2, 0, 0, 0,
55881            0, 0, 0, 0,
55882            0, 0, 0, 0,
55883            0, 0, 0, 1 << 1 | 1 << 3,
55884        );
55885        #[rustfmt::skip]
55886        let b = _mm512_set_epi32(
55887            1 << 1, 0, 0, 0,
55888            0, 0, 0, 0,
55889            0, 0, 0, 0,
55890            0, 0, 0, 1 << 3 | 1 << 4,
55891        );
55892        let r = _mm512_maskz_xor_epi32(0, a, b);
55893        assert_eq_m512i(r, _mm512_setzero_si512());
55894        let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
55895        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
55896        assert_eq_m512i(r, e);
55897    }
55898
55899    #[simd_test(enable = "avx512f,avx512vl")]
55900    unsafe fn test_mm256_xor_epi32() {
55901        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55902        let b = _mm256_set1_epi32(1 << 1);
55903        let r = _mm256_xor_epi32(a, b);
55904        let e = _mm256_set1_epi32(1 << 2);
55905        assert_eq_m256i(r, e);
55906    }
55907
55908    #[simd_test(enable = "avx512f,avx512vl")]
55909    unsafe fn test_mm256_mask_xor_epi32() {
55910        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55911        let b = _mm256_set1_epi32(1 << 1);
55912        let r = _mm256_mask_xor_epi32(a, 0, a, b);
55913        assert_eq_m256i(r, a);
55914        let r = _mm256_mask_xor_epi32(a, 0b11111111, a, b);
55915        let e = _mm256_set1_epi32(1 << 2);
55916        assert_eq_m256i(r, e);
55917    }
55918
55919    #[simd_test(enable = "avx512f,avx512vl")]
55920    unsafe fn test_mm256_maskz_xor_epi32() {
55921        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55922        let b = _mm256_set1_epi32(1 << 1);
55923        let r = _mm256_maskz_xor_epi32(0, a, b);
55924        assert_eq_m256i(r, _mm256_setzero_si256());
55925        let r = _mm256_maskz_xor_epi32(0b11111111, a, b);
55926        let e = _mm256_set1_epi32(1 << 2);
55927        assert_eq_m256i(r, e);
55928    }
55929
55930    #[simd_test(enable = "avx512f,avx512vl")]
55931    unsafe fn test_mm_xor_epi32() {
55932        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55933        let b = _mm_set1_epi32(1 << 1);
55934        let r = _mm_xor_epi32(a, b);
55935        let e = _mm_set1_epi32(1 << 2);
55936        assert_eq_m128i(r, e);
55937    }
55938
55939    #[simd_test(enable = "avx512f,avx512vl")]
55940    unsafe fn test_mm_mask_xor_epi32() {
55941        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55942        let b = _mm_set1_epi32(1 << 1);
55943        let r = _mm_mask_xor_epi32(a, 0, a, b);
55944        assert_eq_m128i(r, a);
55945        let r = _mm_mask_xor_epi32(a, 0b00001111, a, b);
55946        let e = _mm_set1_epi32(1 << 2);
55947        assert_eq_m128i(r, e);
55948    }
55949
55950    #[simd_test(enable = "avx512f,avx512vl")]
55951    unsafe fn test_mm_maskz_xor_epi32() {
55952        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55953        let b = _mm_set1_epi32(1 << 1);
55954        let r = _mm_maskz_xor_epi32(0, a, b);
55955        assert_eq_m128i(r, _mm_setzero_si128());
55956        let r = _mm_maskz_xor_epi32(0b00001111, a, b);
55957        let e = _mm_set1_epi32(1 << 2);
55958        assert_eq_m128i(r, e);
55959    }
55960
55961    #[simd_test(enable = "avx512f")]
55962    unsafe fn test_mm512_xor_si512() {
55963        #[rustfmt::skip]
55964        let a = _mm512_set_epi32(
55965            1 << 1 | 1 << 2, 0, 0, 0,
55966            0, 0, 0, 0,
55967            0, 0, 0, 0,
55968            0, 0, 0, 1 << 1 | 1 << 3,
55969        );
55970        #[rustfmt::skip]
55971        let b = _mm512_set_epi32(
55972            1 << 1, 0, 0, 0,
55973            0, 0, 0, 0,
55974            0, 0, 0, 0,
55975            0, 0, 0, 1 << 3 | 1 << 4,
55976        );
55977        let r = _mm512_xor_epi32(a, b);
55978        #[rustfmt::skip]
55979        let e = _mm512_set_epi32(
55980            1 << 2, 0, 0, 0,
55981            0, 0, 0, 0,
55982            0, 0, 0, 0,
55983            0, 0, 0, 1 << 1 | 1 << 4,
55984        );
55985        assert_eq_m512i(r, e);
55986    }
55987
55988    #[simd_test(enable = "avx512f")]
55989    unsafe fn test_mm512_andnot_epi32() {
55990        let a = _mm512_set1_epi32(0);
55991        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55992        let r = _mm512_andnot_epi32(a, b);
55993        let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
55994        assert_eq_m512i(r, e);
55995    }
55996
55997    #[simd_test(enable = "avx512f")]
55998    unsafe fn test_mm512_mask_andnot_epi32() {
55999        let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
56000        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
56001        let r = _mm512_mask_andnot_epi32(a, 0, a, b);
56002        assert_eq_m512i(r, a);
56003        let r = _mm512_mask_andnot_epi32(a, 0b11111111_11111111, a, b);
56004        let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
56005        assert_eq_m512i(r, e);
56006    }
56007
56008    #[simd_test(enable = "avx512f")]
56009    unsafe fn test_mm512_maskz_andnot_epi32() {
56010        let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
56011        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
56012        let r = _mm512_maskz_andnot_epi32(0, a, b);
56013        assert_eq_m512i(r, _mm512_setzero_si512());
56014        let r = _mm512_maskz_andnot_epi32(0b00000000_11111111, a, b);
56015        #[rustfmt::skip]
56016        let e = _mm512_set_epi32(
56017            0, 0, 0, 0,
56018            0, 0, 0, 0,
56019            1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
56020            1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
56021        );
56022        assert_eq_m512i(r, e);
56023    }
56024
56025    #[simd_test(enable = "avx512f,avx512vl")]
56026    unsafe fn test_mm256_mask_andnot_epi32() {
56027        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
56028        let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
56029        let r = _mm256_mask_andnot_epi32(a, 0, a, b);
56030        assert_eq_m256i(r, a);
56031        let r = _mm256_mask_andnot_epi32(a, 0b11111111, a, b);
56032        let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
56033        assert_eq_m256i(r, e);
56034    }
56035
56036    #[simd_test(enable = "avx512f,avx512vl")]
56037    unsafe fn test_mm256_maskz_andnot_epi32() {
56038        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
56039        let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
56040        let r = _mm256_maskz_andnot_epi32(0, a, b);
56041        assert_eq_m256i(r, _mm256_setzero_si256());
56042        let r = _mm256_maskz_andnot_epi32(0b11111111, a, b);
56043        let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
56044        assert_eq_m256i(r, e);
56045    }
56046
56047    #[simd_test(enable = "avx512f,avx512vl")]
56048    unsafe fn test_mm_mask_andnot_epi32() {
56049        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
56050        let b = _mm_set1_epi32(1 << 3 | 1 << 4);
56051        let r = _mm_mask_andnot_epi32(a, 0, a, b);
56052        assert_eq_m128i(r, a);
56053        let r = _mm_mask_andnot_epi32(a, 0b00001111, a, b);
56054        let e = _mm_set1_epi32(1 << 3 | 1 << 4);
56055        assert_eq_m128i(r, e);
56056    }
56057
56058    #[simd_test(enable = "avx512f,avx512vl")]
56059    unsafe fn test_mm_maskz_andnot_epi32() {
56060        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
56061        let b = _mm_set1_epi32(1 << 3 | 1 << 4);
56062        let r = _mm_maskz_andnot_epi32(0, a, b);
56063        assert_eq_m128i(r, _mm_setzero_si128());
56064        let r = _mm_maskz_andnot_epi32(0b00001111, a, b);
56065        let e = _mm_set1_epi32(1 << 3 | 1 << 4);
56066        assert_eq_m128i(r, e);
56067    }
56068
56069    #[simd_test(enable = "avx512f")]
56070    unsafe fn test_cvtmask16_u32() {
56071        let a: __mmask16 = 0b11001100_00110011;
56072        let r = _cvtmask16_u32(a);
56073        let e: u32 = 0b11001100_00110011;
56074        assert_eq!(r, e);
56075    }
56076
56077    #[simd_test(enable = "avx512f")]
56078    unsafe fn test_cvtu32_mask16() {
56079        let a: u32 = 0b11001100_00110011;
56080        let r = _cvtu32_mask16(a);
56081        let e: __mmask16 = 0b11001100_00110011;
56082        assert_eq!(r, e);
56083    }
56084
56085    #[simd_test(enable = "avx512f")]
56086    unsafe fn test_mm512_kand() {
56087        let a: u16 = 0b11001100_00110011;
56088        let b: u16 = 0b11001100_00110011;
56089        let r = _mm512_kand(a, b);
56090        let e: u16 = 0b11001100_00110011;
56091        assert_eq!(r, e);
56092    }
56093
56094    #[simd_test(enable = "avx512f")]
56095    unsafe fn test_kand_mask16() {
56096        let a: u16 = 0b11001100_00110011;
56097        let b: u16 = 0b11001100_00110011;
56098        let r = _kand_mask16(a, b);
56099        let e: u16 = 0b11001100_00110011;
56100        assert_eq!(r, e);
56101    }
56102
56103    #[simd_test(enable = "avx512f")]
56104    unsafe fn test_mm512_kor() {
56105        let a: u16 = 0b11001100_00110011;
56106        let b: u16 = 0b00101110_00001011;
56107        let r = _mm512_kor(a, b);
56108        let e: u16 = 0b11101110_00111011;
56109        assert_eq!(r, e);
56110    }
56111
56112    #[simd_test(enable = "avx512f")]
56113    unsafe fn test_kor_mask16() {
56114        let a: u16 = 0b11001100_00110011;
56115        let b: u16 = 0b00101110_00001011;
56116        let r = _kor_mask16(a, b);
56117        let e: u16 = 0b11101110_00111011;
56118        assert_eq!(r, e);
56119    }
56120
56121    #[simd_test(enable = "avx512f")]
56122    unsafe fn test_mm512_kxor() {
56123        let a: u16 = 0b11001100_00110011;
56124        let b: u16 = 0b00101110_00001011;
56125        let r = _mm512_kxor(a, b);
56126        let e: u16 = 0b11100010_00111000;
56127        assert_eq!(r, e);
56128    }
56129
56130    #[simd_test(enable = "avx512f")]
56131    unsafe fn test_kxor_mask16() {
56132        let a: u16 = 0b11001100_00110011;
56133        let b: u16 = 0b00101110_00001011;
56134        let r = _kxor_mask16(a, b);
56135        let e: u16 = 0b11100010_00111000;
56136        assert_eq!(r, e);
56137    }
56138
56139    #[simd_test(enable = "avx512f")]
56140    unsafe fn test_mm512_knot() {
56141        let a: u16 = 0b11001100_00110011;
56142        let r = _mm512_knot(a);
56143        let e: u16 = 0b00110011_11001100;
56144        assert_eq!(r, e);
56145    }
56146
56147    #[simd_test(enable = "avx512f")]
56148    unsafe fn test_knot_mask16() {
56149        let a: u16 = 0b11001100_00110011;
56150        let r = _knot_mask16(a);
56151        let e: u16 = 0b00110011_11001100;
56152        assert_eq!(r, e);
56153    }
56154
56155    #[simd_test(enable = "avx512f")]
56156    unsafe fn test_mm512_kandn() {
56157        let a: u16 = 0b11001100_00110011;
56158        let b: u16 = 0b00101110_00001011;
56159        let r = _mm512_kandn(a, b);
56160        let e: u16 = 0b00100010_00001000;
56161        assert_eq!(r, e);
56162    }
56163
56164    #[simd_test(enable = "avx512f")]
56165    unsafe fn test_kandn_mask16() {
56166        let a: u16 = 0b11001100_00110011;
56167        let b: u16 = 0b00101110_00001011;
56168        let r = _kandn_mask16(a, b);
56169        let e: u16 = 0b00100010_00001000;
56170        assert_eq!(r, e);
56171    }
56172
56173    #[simd_test(enable = "avx512f")]
56174    unsafe fn test_mm512_kxnor() {
56175        let a: u16 = 0b11001100_00110011;
56176        let b: u16 = 0b00101110_00001011;
56177        let r = _mm512_kxnor(a, b);
56178        let e: u16 = 0b00011101_11000111;
56179        assert_eq!(r, e);
56180    }
56181
56182    #[simd_test(enable = "avx512f")]
56183    unsafe fn test_kxnor_mask16() {
56184        let a: u16 = 0b11001100_00110011;
56185        let b: u16 = 0b00101110_00001011;
56186        let r = _kxnor_mask16(a, b);
56187        let e: u16 = 0b00011101_11000111;
56188        assert_eq!(r, e);
56189    }
56190
56191    #[simd_test(enable = "avx512dq")]
56192    unsafe fn test_kortest_mask16_u8() {
56193        let a: __mmask16 = 0b0110100101101001;
56194        let b: __mmask16 = 0b1011011010110110;
56195        let mut all_ones: u8 = 0;
56196        let r = _kortest_mask16_u8(a, b, &mut all_ones);
56197        assert_eq!(r, 0);
56198        assert_eq!(all_ones, 1);
56199    }
56200
56201    #[simd_test(enable = "avx512dq")]
56202    unsafe fn test_kortestc_mask16_u8() {
56203        let a: __mmask16 = 0b0110100101101001;
56204        let b: __mmask16 = 0b1011011010110110;
56205        let r = _kortestc_mask16_u8(a, b);
56206        assert_eq!(r, 1);
56207    }
56208
56209    #[simd_test(enable = "avx512dq")]
56210    unsafe fn test_kortestz_mask16_u8() {
56211        let a: __mmask16 = 0b0110100101101001;
56212        let b: __mmask16 = 0b1011011010110110;
56213        let r = _kortestz_mask16_u8(a, b);
56214        assert_eq!(r, 0);
56215    }
56216
56217    #[simd_test(enable = "avx512dq")]
56218    unsafe fn test_kshiftli_mask16() {
56219        let a: __mmask16 = 0b1001011011000011;
56220        let r = _kshiftli_mask16::<3>(a);
56221        let e: __mmask16 = 0b1011011000011000;
56222        assert_eq!(r, e);
56223    }
56224
56225    #[simd_test(enable = "avx512dq")]
56226    unsafe fn test_kshiftri_mask16() {
56227        let a: __mmask16 = 0b0110100100111100;
56228        let r = _kshiftri_mask16::<3>(a);
56229        let e: __mmask16 = 0b0000110100100111;
56230        assert_eq!(r, e);
56231    }
56232
56233    #[simd_test(enable = "avx512f")]
56234    unsafe fn test_load_mask16() {
56235        let a: __mmask16 = 0b1001011011000011;
56236        let r = _load_mask16(&a);
56237        let e: __mmask16 = 0b1001011011000011;
56238        assert_eq!(r, e);
56239    }
56240
56241    #[simd_test(enable = "avx512f")]
56242    unsafe fn test_store_mask16() {
56243        let a: __mmask16 = 0b0110100100111100;
56244        let mut r = 0;
56245        _store_mask16(&mut r, a);
56246        let e: __mmask16 = 0b0110100100111100;
56247        assert_eq!(r, e);
56248    }
56249
56250    #[simd_test(enable = "avx512f")]
56251    unsafe fn test_mm512_kmov() {
56252        let a: u16 = 0b11001100_00110011;
56253        let r = _mm512_kmov(a);
56254        let e: u16 = 0b11001100_00110011;
56255        assert_eq!(r, e);
56256    }
56257
56258    #[simd_test(enable = "avx512f")]
56259    unsafe fn test_mm512_int2mask() {
56260        let a: i32 = 0b11001100_00110011;
56261        let r = _mm512_int2mask(a);
56262        let e: u16 = 0b11001100_00110011;
56263        assert_eq!(r, e);
56264    }
56265
56266    #[simd_test(enable = "avx512f")]
56267    unsafe fn test_mm512_mask2int() {
56268        let k1: __mmask16 = 0b11001100_00110011;
56269        let r = _mm512_mask2int(k1);
56270        let e: i32 = 0b11001100_00110011;
56271        assert_eq!(r, e);
56272    }
56273
56274    #[simd_test(enable = "avx512f")]
56275    unsafe fn test_mm512_kunpackb() {
56276        let a: u16 = 0b11001100_00110011;
56277        let b: u16 = 0b00101110_00001011;
56278        let r = _mm512_kunpackb(a, b);
56279        let e: u16 = 0b00110011_00001011;
56280        assert_eq!(r, e);
56281    }
56282
56283    #[simd_test(enable = "avx512f")]
56284    unsafe fn test_mm512_kortestc() {
56285        let a: u16 = 0b11001100_00110011;
56286        let b: u16 = 0b00101110_00001011;
56287        let r = _mm512_kortestc(a, b);
56288        assert_eq!(r, 0);
56289        let b: u16 = 0b11111111_11111111;
56290        let r = _mm512_kortestc(a, b);
56291        assert_eq!(r, 1);
56292    }
56293
56294    #[simd_test(enable = "avx512f")]
56295    unsafe fn test_mm512_kortestz() {
56296        let a: u16 = 0b11001100_00110011;
56297        let b: u16 = 0b00101110_00001011;
56298        let r = _mm512_kortestz(a, b);
56299        assert_eq!(r, 0);
56300        let r = _mm512_kortestz(0, 0);
56301        assert_eq!(r, 1);
56302    }
56303
56304    #[simd_test(enable = "avx512f")]
56305    unsafe fn test_mm512_test_epi32_mask() {
56306        let a = _mm512_set1_epi32(1 << 0);
56307        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56308        let r = _mm512_test_epi32_mask(a, b);
56309        let e: __mmask16 = 0b11111111_11111111;
56310        assert_eq!(r, e);
56311    }
56312
56313    #[simd_test(enable = "avx512f")]
56314    unsafe fn test_mm512_mask_test_epi32_mask() {
56315        let a = _mm512_set1_epi32(1 << 0);
56316        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56317        let r = _mm512_mask_test_epi32_mask(0, a, b);
56318        assert_eq!(r, 0);
56319        let r = _mm512_mask_test_epi32_mask(0b11111111_11111111, a, b);
56320        let e: __mmask16 = 0b11111111_11111111;
56321        assert_eq!(r, e);
56322    }
56323
56324    #[simd_test(enable = "avx512f,avx512vl")]
56325    unsafe fn test_mm256_test_epi32_mask() {
56326        let a = _mm256_set1_epi32(1 << 0);
56327        let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
56328        let r = _mm256_test_epi32_mask(a, b);
56329        let e: __mmask8 = 0b11111111;
56330        assert_eq!(r, e);
56331    }
56332
56333    #[simd_test(enable = "avx512f,avx512vl")]
56334    unsafe fn test_mm256_mask_test_epi32_mask() {
56335        let a = _mm256_set1_epi32(1 << 0);
56336        let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
56337        let r = _mm256_mask_test_epi32_mask(0, a, b);
56338        assert_eq!(r, 0);
56339        let r = _mm256_mask_test_epi32_mask(0b11111111, a, b);
56340        let e: __mmask8 = 0b11111111;
56341        assert_eq!(r, e);
56342    }
56343
56344    #[simd_test(enable = "avx512f,avx512vl")]
56345    unsafe fn test_mm_test_epi32_mask() {
56346        let a = _mm_set1_epi32(1 << 0);
56347        let b = _mm_set1_epi32(1 << 0 | 1 << 1);
56348        let r = _mm_test_epi32_mask(a, b);
56349        let e: __mmask8 = 0b00001111;
56350        assert_eq!(r, e);
56351    }
56352
56353    #[simd_test(enable = "avx512f,avx512vl")]
56354    unsafe fn test_mm_mask_test_epi32_mask() {
56355        let a = _mm_set1_epi32(1 << 0);
56356        let b = _mm_set1_epi32(1 << 0 | 1 << 1);
56357        let r = _mm_mask_test_epi32_mask(0, a, b);
56358        assert_eq!(r, 0);
56359        let r = _mm_mask_test_epi32_mask(0b11111111, a, b);
56360        let e: __mmask8 = 0b00001111;
56361        assert_eq!(r, e);
56362    }
56363
56364    #[simd_test(enable = "avx512f")]
56365    unsafe fn test_mm512_testn_epi32_mask() {
56366        let a = _mm512_set1_epi32(1 << 0);
56367        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56368        let r = _mm512_testn_epi32_mask(a, b);
56369        let e: __mmask16 = 0b00000000_00000000;
56370        assert_eq!(r, e);
56371    }
56372
56373    #[simd_test(enable = "avx512f")]
56374    unsafe fn test_mm512_mask_testn_epi32_mask() {
56375        let a = _mm512_set1_epi32(1 << 0);
56376        let b = _mm512_set1_epi32(1 << 1);
56377        let r = _mm512_mask_test_epi32_mask(0, a, b);
56378        assert_eq!(r, 0);
56379        let r = _mm512_mask_testn_epi32_mask(0b11111111_11111111, a, b);
56380        let e: __mmask16 = 0b11111111_11111111;
56381        assert_eq!(r, e);
56382    }
56383
56384    #[simd_test(enable = "avx512f,avx512vl")]
56385    unsafe fn test_mm256_testn_epi32_mask() {
56386        let a = _mm256_set1_epi32(1 << 0);
56387        let b = _mm256_set1_epi32(1 << 1);
56388        let r = _mm256_testn_epi32_mask(a, b);
56389        let e: __mmask8 = 0b11111111;
56390        assert_eq!(r, e);
56391    }
56392
56393    #[simd_test(enable = "avx512f,avx512vl")]
56394    unsafe fn test_mm256_mask_testn_epi32_mask() {
56395        let a = _mm256_set1_epi32(1 << 0);
56396        let b = _mm256_set1_epi32(1 << 1);
56397        let r = _mm256_mask_test_epi32_mask(0, a, b);
56398        assert_eq!(r, 0);
56399        let r = _mm256_mask_testn_epi32_mask(0b11111111, a, b);
56400        let e: __mmask8 = 0b11111111;
56401        assert_eq!(r, e);
56402    }
56403
56404    #[simd_test(enable = "avx512f,avx512vl")]
56405    unsafe fn test_mm_testn_epi32_mask() {
56406        let a = _mm_set1_epi32(1 << 0);
56407        let b = _mm_set1_epi32(1 << 1);
56408        let r = _mm_testn_epi32_mask(a, b);
56409        let e: __mmask8 = 0b00001111;
56410        assert_eq!(r, e);
56411    }
56412
56413    #[simd_test(enable = "avx512f,avx512vl")]
56414    unsafe fn test_mm_mask_testn_epi32_mask() {
56415        let a = _mm_set1_epi32(1 << 0);
56416        let b = _mm_set1_epi32(1 << 1);
56417        let r = _mm_mask_test_epi32_mask(0, a, b);
56418        assert_eq!(r, 0);
56419        let r = _mm_mask_testn_epi32_mask(0b11111111, a, b);
56420        let e: __mmask8 = 0b00001111;
56421        assert_eq!(r, e);
56422    }
56423
56424    #[simd_test(enable = "avx512f")]
56425    #[cfg_attr(miri, ignore)]
56426    unsafe fn test_mm512_stream_ps() {
56427        #[repr(align(64))]
56428        struct Memory {
56429            pub data: [f32; 16], // 64 bytes
56430        }
56431        let a = _mm512_set1_ps(7.0);
56432        let mut mem = Memory { data: [-1.0; 16] };
56433
56434        _mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
56435        for i in 0..16 {
56436            assert_eq!(mem.data[i], get_m512(a, i));
56437        }
56438    }
56439
56440    #[simd_test(enable = "avx512f")]
56441    #[cfg_attr(miri, ignore)]
56442    unsafe fn test_mm512_stream_pd() {
56443        #[repr(align(64))]
56444        struct Memory {
56445            pub data: [f64; 8],
56446        }
56447        let a = _mm512_set1_pd(7.0);
56448        let mut mem = Memory { data: [-1.0; 8] };
56449
56450        _mm512_stream_pd(&mut mem.data[0] as *mut f64, a);
56451        for i in 0..8 {
56452            assert_eq!(mem.data[i], get_m512d(a, i));
56453        }
56454    }
56455
56456    #[simd_test(enable = "avx512f")]
56457    #[cfg_attr(miri, ignore)]
56458    unsafe fn test_mm512_stream_si512() {
56459        #[repr(align(64))]
56460        struct Memory {
56461            pub data: [i64; 8],
56462        }
56463        let a = _mm512_set1_epi32(7);
56464        let mut mem = Memory { data: [-1; 8] };
56465
56466        _mm512_stream_si512(mem.data.as_mut_ptr().cast(), a);
56467        for i in 0..8 {
56468            assert_eq!(mem.data[i], get_m512i(a, i));
56469        }
56470    }
56471
56472    #[simd_test(enable = "avx512f")]
56473    unsafe fn test_mm512_stream_load_si512() {
56474        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
56475        let r = _mm512_stream_load_si512(core::ptr::addr_of!(a) as *const _);
56476        assert_eq_m512i(a, r);
56477    }
56478
56479    #[simd_test(enable = "avx512f")]
56480    unsafe fn test_mm512_reduce_add_epi32() {
56481        let a = _mm512_set1_epi32(1);
56482        let e: i32 = _mm512_reduce_add_epi32(a);
56483        assert_eq!(16, e);
56484    }
56485
56486    #[simd_test(enable = "avx512f")]
56487    unsafe fn test_mm512_mask_reduce_add_epi32() {
56488        let a = _mm512_set1_epi32(1);
56489        let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a);
56490        assert_eq!(8, e);
56491    }
56492
56493    #[simd_test(enable = "avx512f")]
56494    unsafe fn test_mm512_reduce_add_ps() {
56495        let a = _mm512_set1_ps(1.);
56496        let e: f32 = _mm512_reduce_add_ps(a);
56497        assert_eq!(16., e);
56498    }
56499
56500    #[simd_test(enable = "avx512f")]
56501    unsafe fn test_mm512_mask_reduce_add_ps() {
56502        let a = _mm512_set1_ps(1.);
56503        let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a);
56504        assert_eq!(8., e);
56505    }
56506
56507    #[simd_test(enable = "avx512f")]
56508    unsafe fn test_mm512_reduce_mul_epi32() {
56509        let a = _mm512_set1_epi32(2);
56510        let e: i32 = _mm512_reduce_mul_epi32(a);
56511        assert_eq!(65536, e);
56512    }
56513
56514    #[simd_test(enable = "avx512f")]
56515    unsafe fn test_mm512_mask_reduce_mul_epi32() {
56516        let a = _mm512_set1_epi32(2);
56517        let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a);
56518        assert_eq!(256, e);
56519    }
56520
56521    #[simd_test(enable = "avx512f")]
56522    unsafe fn test_mm512_reduce_mul_ps() {
56523        let a = _mm512_set1_ps(2.);
56524        let e: f32 = _mm512_reduce_mul_ps(a);
56525        assert_eq!(65536., e);
56526    }
56527
56528    #[simd_test(enable = "avx512f")]
56529    unsafe fn test_mm512_mask_reduce_mul_ps() {
56530        let a = _mm512_set1_ps(2.);
56531        let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a);
56532        assert_eq!(256., e);
56533    }
56534
56535    #[simd_test(enable = "avx512f")]
56536    unsafe fn test_mm512_reduce_max_epi32() {
56537        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56538        let e: i32 = _mm512_reduce_max_epi32(a);
56539        assert_eq!(15, e);
56540    }
56541
56542    #[simd_test(enable = "avx512f")]
56543    unsafe fn test_mm512_mask_reduce_max_epi32() {
56544        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56545        let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a);
56546        assert_eq!(7, e);
56547    }
56548
56549    #[simd_test(enable = "avx512f")]
56550    unsafe fn test_mm512_reduce_max_epu32() {
56551        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56552        let e: u32 = _mm512_reduce_max_epu32(a);
56553        assert_eq!(15, e);
56554    }
56555
56556    #[simd_test(enable = "avx512f")]
56557    unsafe fn test_mm512_mask_reduce_max_epu32() {
56558        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56559        let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a);
56560        assert_eq!(7, e);
56561    }
56562
56563    #[simd_test(enable = "avx512f")]
56564    unsafe fn test_mm512_reduce_max_ps() {
56565        let a = _mm512_set_ps(
56566            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56567        );
56568        let e: f32 = _mm512_reduce_max_ps(a);
56569        assert_eq!(15., e);
56570    }
56571
56572    #[simd_test(enable = "avx512f")]
56573    unsafe fn test_mm512_mask_reduce_max_ps() {
56574        let a = _mm512_set_ps(
56575            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56576        );
56577        let e: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a);
56578        assert_eq!(7., e);
56579    }
56580
56581    #[simd_test(enable = "avx512f")]
56582    unsafe fn test_mm512_reduce_min_epi32() {
56583        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56584        let e: i32 = _mm512_reduce_min_epi32(a);
56585        assert_eq!(0, e);
56586    }
56587
56588    #[simd_test(enable = "avx512f")]
56589    unsafe fn test_mm512_mask_reduce_min_epi32() {
56590        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56591        let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a);
56592        assert_eq!(0, e);
56593    }
56594
56595    #[simd_test(enable = "avx512f")]
56596    unsafe fn test_mm512_reduce_min_epu32() {
56597        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56598        let e: u32 = _mm512_reduce_min_epu32(a);
56599        assert_eq!(0, e);
56600    }
56601
56602    #[simd_test(enable = "avx512f")]
56603    unsafe fn test_mm512_mask_reduce_min_epu32() {
56604        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56605        let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a);
56606        assert_eq!(0, e);
56607    }
56608
56609    #[simd_test(enable = "avx512f")]
56610    unsafe fn test_mm512_reduce_min_ps() {
56611        let a = _mm512_set_ps(
56612            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56613        );
56614        let e: f32 = _mm512_reduce_min_ps(a);
56615        assert_eq!(0., e);
56616    }
56617
56618    #[simd_test(enable = "avx512f")]
56619    unsafe fn test_mm512_mask_reduce_min_ps() {
56620        let a = _mm512_set_ps(
56621            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56622        );
56623        let e: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a);
56624        assert_eq!(0., e);
56625    }
56626
56627    #[simd_test(enable = "avx512f")]
56628    unsafe fn test_mm512_reduce_and_epi32() {
56629        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56630        let e: i32 = _mm512_reduce_and_epi32(a);
56631        assert_eq!(0, e);
56632    }
56633
56634    #[simd_test(enable = "avx512f")]
56635    unsafe fn test_mm512_mask_reduce_and_epi32() {
56636        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56637        let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
56638        assert_eq!(1, e);
56639    }
56640
56641    #[simd_test(enable = "avx512f")]
56642    unsafe fn test_mm512_reduce_or_epi32() {
56643        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56644        let e: i32 = _mm512_reduce_or_epi32(a);
56645        assert_eq!(3, e);
56646    }
56647
56648    #[simd_test(enable = "avx512f")]
56649    unsafe fn test_mm512_mask_reduce_or_epi32() {
56650        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56651        let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
56652        assert_eq!(1, e);
56653    }
56654
56655    #[simd_test(enable = "avx512f")]
56656    unsafe fn test_mm512_mask_compress_epi32() {
56657        let src = _mm512_set1_epi32(200);
56658        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56659        let r = _mm512_mask_compress_epi32(src, 0, a);
56660        assert_eq_m512i(r, src);
56661        let r = _mm512_mask_compress_epi32(src, 0b01010101_01010101, a);
56662        let e = _mm512_set_epi32(
56663            200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
56664        );
56665        assert_eq_m512i(r, e);
56666    }
56667
56668    #[simd_test(enable = "avx512f")]
56669    unsafe fn test_mm512_maskz_compress_epi32() {
56670        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56671        let r = _mm512_maskz_compress_epi32(0, a);
56672        assert_eq_m512i(r, _mm512_setzero_si512());
56673        let r = _mm512_maskz_compress_epi32(0b01010101_01010101, a);
56674        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
56675        assert_eq_m512i(r, e);
56676    }
56677
56678    #[simd_test(enable = "avx512f,avx512vl")]
56679    unsafe fn test_mm256_mask_compress_epi32() {
56680        let src = _mm256_set1_epi32(200);
56681        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56682        let r = _mm256_mask_compress_epi32(src, 0, a);
56683        assert_eq_m256i(r, src);
56684        let r = _mm256_mask_compress_epi32(src, 0b01010101, a);
56685        let e = _mm256_set_epi32(200, 200, 200, 200, 1, 3, 5, 7);
56686        assert_eq_m256i(r, e);
56687    }
56688
56689    #[simd_test(enable = "avx512f,avx512vl")]
56690    unsafe fn test_mm256_maskz_compress_epi32() {
56691        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56692        let r = _mm256_maskz_compress_epi32(0, a);
56693        assert_eq_m256i(r, _mm256_setzero_si256());
56694        let r = _mm256_maskz_compress_epi32(0b01010101, a);
56695        let e = _mm256_set_epi32(0, 0, 0, 0, 1, 3, 5, 7);
56696        assert_eq_m256i(r, e);
56697    }
56698
56699    #[simd_test(enable = "avx512f,avx512vl")]
56700    unsafe fn test_mm_mask_compress_epi32() {
56701        let src = _mm_set1_epi32(200);
56702        let a = _mm_set_epi32(0, 1, 2, 3);
56703        let r = _mm_mask_compress_epi32(src, 0, a);
56704        assert_eq_m128i(r, src);
56705        let r = _mm_mask_compress_epi32(src, 0b00000101, a);
56706        let e = _mm_set_epi32(200, 200, 1, 3);
56707        assert_eq_m128i(r, e);
56708    }
56709
56710    #[simd_test(enable = "avx512f,avx512vl")]
56711    unsafe fn test_mm_maskz_compress_epi32() {
56712        let a = _mm_set_epi32(0, 1, 2, 3);
56713        let r = _mm_maskz_compress_epi32(0, a);
56714        assert_eq_m128i(r, _mm_setzero_si128());
56715        let r = _mm_maskz_compress_epi32(0b00000101, a);
56716        let e = _mm_set_epi32(0, 0, 1, 3);
56717        assert_eq_m128i(r, e);
56718    }
56719
56720    #[simd_test(enable = "avx512f")]
56721    unsafe fn test_mm512_mask_compress_ps() {
56722        let src = _mm512_set1_ps(200.);
56723        let a = _mm512_set_ps(
56724            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56725        );
56726        let r = _mm512_mask_compress_ps(src, 0, a);
56727        assert_eq_m512(r, src);
56728        let r = _mm512_mask_compress_ps(src, 0b01010101_01010101, a);
56729        let e = _mm512_set_ps(
56730            200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15.,
56731        );
56732        assert_eq_m512(r, e);
56733    }
56734
56735    #[simd_test(enable = "avx512f")]
56736    unsafe fn test_mm512_maskz_compress_ps() {
56737        let a = _mm512_set_ps(
56738            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56739        );
56740        let r = _mm512_maskz_compress_ps(0, a);
56741        assert_eq_m512(r, _mm512_setzero_ps());
56742        let r = _mm512_maskz_compress_ps(0b01010101_01010101, a);
56743        let e = _mm512_set_ps(
56744            0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15.,
56745        );
56746        assert_eq_m512(r, e);
56747    }
56748
56749    #[simd_test(enable = "avx512f,avx512vl")]
56750    unsafe fn test_mm256_mask_compress_ps() {
56751        let src = _mm256_set1_ps(200.);
56752        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56753        let r = _mm256_mask_compress_ps(src, 0, a);
56754        assert_eq_m256(r, src);
56755        let r = _mm256_mask_compress_ps(src, 0b01010101, a);
56756        let e = _mm256_set_ps(200., 200., 200., 200., 1., 3., 5., 7.);
56757        assert_eq_m256(r, e);
56758    }
56759
56760    #[simd_test(enable = "avx512f,avx512vl")]
56761    unsafe fn test_mm256_maskz_compress_ps() {
56762        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56763        let r = _mm256_maskz_compress_ps(0, a);
56764        assert_eq_m256(r, _mm256_setzero_ps());
56765        let r = _mm256_maskz_compress_ps(0b01010101, a);
56766        let e = _mm256_set_ps(0., 0., 0., 0., 1., 3., 5., 7.);
56767        assert_eq_m256(r, e);
56768    }
56769
56770    #[simd_test(enable = "avx512f,avx512vl")]
56771    unsafe fn test_mm_mask_compress_ps() {
56772        let src = _mm_set1_ps(200.);
56773        let a = _mm_set_ps(0., 1., 2., 3.);
56774        let r = _mm_mask_compress_ps(src, 0, a);
56775        assert_eq_m128(r, src);
56776        let r = _mm_mask_compress_ps(src, 0b00000101, a);
56777        let e = _mm_set_ps(200., 200., 1., 3.);
56778        assert_eq_m128(r, e);
56779    }
56780
56781    #[simd_test(enable = "avx512f,avx512vl")]
56782    unsafe fn test_mm_maskz_compress_ps() {
56783        let a = _mm_set_ps(0., 1., 2., 3.);
56784        let r = _mm_maskz_compress_ps(0, a);
56785        assert_eq_m128(r, _mm_setzero_ps());
56786        let r = _mm_maskz_compress_ps(0b00000101, a);
56787        let e = _mm_set_ps(0., 0., 1., 3.);
56788        assert_eq_m128(r, e);
56789    }
56790
56791    #[simd_test(enable = "avx512f")]
56792    unsafe fn test_mm512_mask_compressstoreu_epi32() {
56793        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56794        let mut r = [0_i32; 16];
56795        _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
56796        assert_eq!(&r, &[0_i32; 16]);
56797        _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1111000011001010, a);
56798        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
56799    }
56800
56801    #[simd_test(enable = "avx512f,avx512vl")]
56802    unsafe fn test_mm256_mask_compressstoreu_epi32() {
56803        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56804        let mut r = [0_i32; 8];
56805        _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
56806        assert_eq!(&r, &[0_i32; 8]);
56807        _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b11001010, a);
56808        assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
56809    }
56810
56811    #[simd_test(enable = "avx512f,avx512vl")]
56812    unsafe fn test_mm_mask_compressstoreu_epi32() {
56813        let a = _mm_setr_epi32(1, 2, 3, 4);
56814        let mut r = [0_i32; 4];
56815        _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
56816        assert_eq!(&r, &[0_i32; 4]);
56817        _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1011, a);
56818        assert_eq!(&r, &[1, 2, 4, 0]);
56819    }
56820
56821    #[simd_test(enable = "avx512f")]
56822    unsafe fn test_mm512_mask_compressstoreu_epi64() {
56823        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
56824        let mut r = [0_i64; 8];
56825        _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
56826        assert_eq!(&r, &[0_i64; 8]);
56827        _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b11001010, a);
56828        assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
56829    }
56830
56831    #[simd_test(enable = "avx512f,avx512vl")]
56832    unsafe fn test_mm256_mask_compressstoreu_epi64() {
56833        let a = _mm256_setr_epi64x(1, 2, 3, 4);
56834        let mut r = [0_i64; 4];
56835        _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
56836        assert_eq!(&r, &[0_i64; 4]);
56837        _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b1011, a);
56838        assert_eq!(&r, &[1, 2, 4, 0]);
56839    }
56840
56841    #[simd_test(enable = "avx512f,avx512vl")]
56842    unsafe fn test_mm_mask_compressstoreu_epi64() {
56843        let a = _mm_setr_epi64x(1, 2);
56844        let mut r = [0_i64; 2];
56845        _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
56846        assert_eq!(&r, &[0_i64; 2]);
56847        _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b10, a);
56848        assert_eq!(&r, &[2, 0]);
56849    }
56850
56851    #[simd_test(enable = "avx512f")]
56852    unsafe fn test_mm512_mask_compressstoreu_ps() {
56853        let a = _mm512_setr_ps(
56854            1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32, 9_f32, 10_f32, 11_f32, 12_f32,
56855            13_f32, 14_f32, 15_f32, 16_f32,
56856        );
56857        let mut r = [0_f32; 16];
56858        _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
56859        assert_eq!(&r, &[0_f32; 16]);
56860        _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1111000011001010, a);
56861        assert_eq!(
56862            &r,
56863            &[
56864                2_f32, 4_f32, 7_f32, 8_f32, 13_f32, 14_f32, 15_f32, 16_f32, 0_f32, 0_f32, 0_f32,
56865                0_f32, 0_f32, 0_f32, 0_f32, 0_f32
56866            ]
56867        );
56868    }
56869
56870    #[simd_test(enable = "avx512f,avx512vl")]
56871    unsafe fn test_mm256_mask_compressstoreu_ps() {
56872        let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32);
56873        let mut r = [0_f32; 8];
56874        _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
56875        assert_eq!(&r, &[0_f32; 8]);
56876        _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0b11001010, a);
56877        assert_eq!(
56878            &r,
56879            &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32]
56880        );
56881    }
56882
56883    #[simd_test(enable = "avx512f,avx512vl")]
56884    unsafe fn test_mm_mask_compressstoreu_ps() {
56885        let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32);
56886        let mut r = [0.; 4];
56887        _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
56888        assert_eq!(&r, &[0.; 4]);
56889        _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1011, a);
56890        assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]);
56891    }
56892
56893    #[simd_test(enable = "avx512f")]
56894    unsafe fn test_mm512_mask_compressstoreu_pd() {
56895        let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
56896        let mut r = [0.; 8];
56897        _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
56898        assert_eq!(&r, &[0.; 8]);
56899        _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0b11001010, a);
56900        assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]);
56901    }
56902
56903    #[simd_test(enable = "avx512f,avx512vl")]
56904    unsafe fn test_mm256_mask_compressstoreu_pd() {
56905        let a = _mm256_setr_pd(1., 2., 3., 4.);
56906        let mut r = [0.; 4];
56907        _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
56908        assert_eq!(&r, &[0.; 4]);
56909        _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0b1011, a);
56910        assert_eq!(&r, &[1., 2., 4., 0.]);
56911    }
56912
56913    #[simd_test(enable = "avx512f,avx512vl")]
56914    unsafe fn test_mm_mask_compressstoreu_pd() {
56915        let a = _mm_setr_pd(1., 2.);
56916        let mut r = [0.; 2];
56917        _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
56918        assert_eq!(&r, &[0.; 2]);
56919        _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0b10, a);
56920        assert_eq!(&r, &[2., 0.]);
56921    }
56922
56923    #[simd_test(enable = "avx512f")]
56924    unsafe fn test_mm512_mask_expand_epi32() {
56925        let src = _mm512_set1_epi32(200);
56926        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56927        let r = _mm512_mask_expand_epi32(src, 0, a);
56928        assert_eq_m512i(r, src);
56929        let r = _mm512_mask_expand_epi32(src, 0b01010101_01010101, a);
56930        let e = _mm512_set_epi32(
56931            200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
56932        );
56933        assert_eq_m512i(r, e);
56934    }
56935
56936    #[simd_test(enable = "avx512f")]
56937    unsafe fn test_mm512_maskz_expand_epi32() {
56938        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56939        let r = _mm512_maskz_expand_epi32(0, a);
56940        assert_eq_m512i(r, _mm512_setzero_si512());
56941        let r = _mm512_maskz_expand_epi32(0b01010101_01010101, a);
56942        let e = _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
56943        assert_eq_m512i(r, e);
56944    }
56945
56946    #[simd_test(enable = "avx512f,avx512vl")]
56947    unsafe fn test_mm256_mask_expand_epi32() {
56948        let src = _mm256_set1_epi32(200);
56949        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56950        let r = _mm256_mask_expand_epi32(src, 0, a);
56951        assert_eq_m256i(r, src);
56952        let r = _mm256_mask_expand_epi32(src, 0b01010101, a);
56953        let e = _mm256_set_epi32(200, 4, 200, 5, 200, 6, 200, 7);
56954        assert_eq_m256i(r, e);
56955    }
56956
56957    #[simd_test(enable = "avx512f,avx512vl")]
56958    unsafe fn test_mm256_maskz_expand_epi32() {
56959        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56960        let r = _mm256_maskz_expand_epi32(0, a);
56961        assert_eq_m256i(r, _mm256_setzero_si256());
56962        let r = _mm256_maskz_expand_epi32(0b01010101, a);
56963        let e = _mm256_set_epi32(0, 4, 0, 5, 0, 6, 0, 7);
56964        assert_eq_m256i(r, e);
56965    }
56966
56967    #[simd_test(enable = "avx512f,avx512vl")]
56968    unsafe fn test_mm_mask_expand_epi32() {
56969        let src = _mm_set1_epi32(200);
56970        let a = _mm_set_epi32(0, 1, 2, 3);
56971        let r = _mm_mask_expand_epi32(src, 0, a);
56972        assert_eq_m128i(r, src);
56973        let r = _mm_mask_expand_epi32(src, 0b00000101, a);
56974        let e = _mm_set_epi32(200, 2, 200, 3);
56975        assert_eq_m128i(r, e);
56976    }
56977
56978    #[simd_test(enable = "avx512f,avx512vl")]
56979    unsafe fn test_mm_maskz_expand_epi32() {
56980        let a = _mm_set_epi32(0, 1, 2, 3);
56981        let r = _mm_maskz_expand_epi32(0, a);
56982        assert_eq_m128i(r, _mm_setzero_si128());
56983        let r = _mm_maskz_expand_epi32(0b00000101, a);
56984        let e = _mm_set_epi32(0, 2, 0, 3);
56985        assert_eq_m128i(r, e);
56986    }
56987
56988    #[simd_test(enable = "avx512f")]
56989    unsafe fn test_mm512_mask_expand_ps() {
56990        let src = _mm512_set1_ps(200.);
56991        let a = _mm512_set_ps(
56992            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56993        );
56994        let r = _mm512_mask_expand_ps(src, 0, a);
56995        assert_eq_m512(r, src);
56996        let r = _mm512_mask_expand_ps(src, 0b01010101_01010101, a);
56997        let e = _mm512_set_ps(
56998            200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15.,
56999        );
57000        assert_eq_m512(r, e);
57001    }
57002
57003    #[simd_test(enable = "avx512f")]
57004    unsafe fn test_mm512_maskz_expand_ps() {
57005        let a = _mm512_set_ps(
57006            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
57007        );
57008        let r = _mm512_maskz_expand_ps(0, a);
57009        assert_eq_m512(r, _mm512_setzero_ps());
57010        let r = _mm512_maskz_expand_ps(0b01010101_01010101, a);
57011        let e = _mm512_set_ps(
57012            0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15.,
57013        );
57014        assert_eq_m512(r, e);
57015    }
57016
57017    #[simd_test(enable = "avx512f,avx512vl")]
57018    unsafe fn test_mm256_mask_expand_ps() {
57019        let src = _mm256_set1_ps(200.);
57020        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
57021        let r = _mm256_mask_expand_ps(src, 0, a);
57022        assert_eq_m256(r, src);
57023        let r = _mm256_mask_expand_ps(src, 0b01010101, a);
57024        let e = _mm256_set_ps(200., 4., 200., 5., 200., 6., 200., 7.);
57025        assert_eq_m256(r, e);
57026    }
57027
57028    #[simd_test(enable = "avx512f,avx512vl")]
57029    unsafe fn test_mm256_maskz_expand_ps() {
57030        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
57031        let r = _mm256_maskz_expand_ps(0, a);
57032        assert_eq_m256(r, _mm256_setzero_ps());
57033        let r = _mm256_maskz_expand_ps(0b01010101, a);
57034        let e = _mm256_set_ps(0., 4., 0., 5., 0., 6., 0., 7.);
57035        assert_eq_m256(r, e);
57036    }
57037
57038    #[simd_test(enable = "avx512f,avx512vl")]
57039    unsafe fn test_mm_mask_expand_ps() {
57040        let src = _mm_set1_ps(200.);
57041        let a = _mm_set_ps(0., 1., 2., 3.);
57042        let r = _mm_mask_expand_ps(src, 0, a);
57043        assert_eq_m128(r, src);
57044        let r = _mm_mask_expand_ps(src, 0b00000101, a);
57045        let e = _mm_set_ps(200., 2., 200., 3.);
57046        assert_eq_m128(r, e);
57047    }
57048
57049    #[simd_test(enable = "avx512f,avx512vl")]
57050    unsafe fn test_mm_maskz_expand_ps() {
57051        let a = _mm_set_ps(0., 1., 2., 3.);
57052        let r = _mm_maskz_expand_ps(0, a);
57053        assert_eq_m128(r, _mm_setzero_ps());
57054        let r = _mm_maskz_expand_ps(0b00000101, a);
57055        let e = _mm_set_ps(0., 2., 0., 3.);
57056        assert_eq_m128(r, e);
57057    }
57058
57059    #[simd_test(enable = "avx512f")]
57060    unsafe fn test_mm512_loadu_epi32() {
57061        let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
57062        let p = a.as_ptr();
57063        let r = _mm512_loadu_epi32(black_box(p));
57064        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57065        assert_eq_m512i(r, e);
57066    }
57067
57068    #[simd_test(enable = "avx512f,avx512vl")]
57069    unsafe fn test_mm256_loadu_epi32() {
57070        let a = &[4, 3, 2, 5, 8, 9, 64, 50];
57071        let p = a.as_ptr();
57072        let r = _mm256_loadu_epi32(black_box(p));
57073        let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
57074        assert_eq_m256i(r, e);
57075    }
57076
57077    #[simd_test(enable = "avx512f,avx512vl")]
57078    unsafe fn test_mm_loadu_epi32() {
57079        let a = &[4, 3, 2, 5];
57080        let p = a.as_ptr();
57081        let r = _mm_loadu_epi32(black_box(p));
57082        let e = _mm_setr_epi32(4, 3, 2, 5);
57083        assert_eq_m128i(r, e);
57084    }
57085
57086    #[simd_test(enable = "avx512f")]
57087    unsafe fn test_mm512_mask_cvtepi32_storeu_epi16() {
57088        let a = _mm512_set1_epi32(9);
57089        let mut r = _mm256_undefined_si256();
57090        _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
57091        let e = _mm256_set1_epi16(9);
57092        assert_eq_m256i(r, e);
57093    }
57094
57095    #[simd_test(enable = "avx512f,avx512vl")]
57096    unsafe fn test_mm256_mask_cvtepi32_storeu_epi16() {
57097        let a = _mm256_set1_epi32(9);
57098        let mut r = _mm_undefined_si128();
57099        _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57100        let e = _mm_set1_epi16(9);
57101        assert_eq_m128i(r, e);
57102    }
57103
57104    #[simd_test(enable = "avx512f,avx512vl")]
57105    unsafe fn test_mm_mask_cvtepi32_storeu_epi16() {
57106        let a = _mm_set1_epi32(9);
57107        let mut r = _mm_set1_epi8(0);
57108        _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57109        let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
57110        assert_eq_m128i(r, e);
57111    }
57112
57113    #[simd_test(enable = "avx512f")]
57114    unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() {
57115        let a = _mm512_set1_epi32(i32::MAX);
57116        let mut r = _mm256_undefined_si256();
57117        _mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
57118        let e = _mm256_set1_epi16(i16::MAX);
57119        assert_eq_m256i(r, e);
57120    }
57121
57122    #[simd_test(enable = "avx512f,avx512vl")]
57123    unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() {
57124        let a = _mm256_set1_epi32(i32::MAX);
57125        let mut r = _mm_undefined_si128();
57126        _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57127        let e = _mm_set1_epi16(i16::MAX);
57128        assert_eq_m128i(r, e);
57129    }
57130
57131    #[simd_test(enable = "avx512f,avx512vl")]
57132    unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() {
57133        let a = _mm_set1_epi32(i32::MAX);
57134        let mut r = _mm_set1_epi8(0);
57135        _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57136        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
57137        assert_eq_m128i(r, e);
57138    }
57139
57140    #[simd_test(enable = "avx512f")]
57141    unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() {
57142        let a = _mm512_set1_epi32(i32::MAX);
57143        let mut r = _mm256_undefined_si256();
57144        _mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
57145        let e = _mm256_set1_epi16(u16::MAX as i16);
57146        assert_eq_m256i(r, e);
57147    }
57148
57149    #[simd_test(enable = "avx512f,avx512vl")]
57150    unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() {
57151        let a = _mm256_set1_epi32(i32::MAX);
57152        let mut r = _mm_undefined_si128();
57153        _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57154        let e = _mm_set1_epi16(u16::MAX as i16);
57155        assert_eq_m128i(r, e);
57156    }
57157
57158    #[simd_test(enable = "avx512f,avx512vl")]
57159    unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() {
57160        let a = _mm_set1_epi32(i32::MAX);
57161        let mut r = _mm_set1_epi8(0);
57162        _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57163        let e = _mm_set_epi16(
57164            0,
57165            0,
57166            0,
57167            0,
57168            u16::MAX as i16,
57169            u16::MAX as i16,
57170            u16::MAX as i16,
57171            u16::MAX as i16,
57172        );
57173        assert_eq_m128i(r, e);
57174    }
57175
57176    #[simd_test(enable = "avx512f")]
57177    unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() {
57178        let a = _mm512_set1_epi32(9);
57179        let mut r = _mm_undefined_si128();
57180        _mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57181        let e = _mm_set1_epi8(9);
57182        assert_eq_m128i(r, e);
57183    }
57184
57185    #[simd_test(enable = "avx512f,avx512vl")]
57186    unsafe fn test_mm256_mask_cvtepi32_storeu_epi8() {
57187        let a = _mm256_set1_epi32(9);
57188        let mut r = _mm_set1_epi8(0);
57189        _mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57190        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9);
57191        assert_eq_m128i(r, e);
57192    }
57193
57194    #[simd_test(enable = "avx512f,avx512vl")]
57195    unsafe fn test_mm_mask_cvtepi32_storeu_epi8() {
57196        let a = _mm_set1_epi32(9);
57197        let mut r = _mm_set1_epi8(0);
57198        _mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57199        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
57200        assert_eq_m128i(r, e);
57201    }
57202
57203    #[simd_test(enable = "avx512f")]
57204    unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() {
57205        let a = _mm512_set1_epi32(i32::MAX);
57206        let mut r = _mm_undefined_si128();
57207        _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57208        let e = _mm_set1_epi8(i8::MAX);
57209        assert_eq_m128i(r, e);
57210    }
57211
57212    #[simd_test(enable = "avx512f,avx512vl")]
57213    unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() {
57214        let a = _mm256_set1_epi32(i32::MAX);
57215        let mut r = _mm_set1_epi8(0);
57216        _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57217        #[rustfmt::skip]
57218        let e = _mm_set_epi8(
57219            0, 0, 0, 0,
57220            0, 0, 0, 0,
57221            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57222            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57223        );
57224        assert_eq_m128i(r, e);
57225    }
57226
57227    #[simd_test(enable = "avx512f,avx512vl")]
57228    unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() {
57229        let a = _mm_set1_epi32(i32::MAX);
57230        let mut r = _mm_set1_epi8(0);
57231        _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57232        #[rustfmt::skip]
57233        let e = _mm_set_epi8(
57234            0, 0, 0, 0,
57235            0, 0, 0, 0,
57236            0, 0, 0, 0,
57237            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57238        );
57239        assert_eq_m128i(r, e);
57240    }
57241
57242    #[simd_test(enable = "avx512f")]
57243    unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() {
57244        let a = _mm512_set1_epi32(i32::MAX);
57245        let mut r = _mm_undefined_si128();
57246        _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57247        let e = _mm_set1_epi8(u8::MAX as i8);
57248        assert_eq_m128i(r, e);
57249    }
57250
57251    #[simd_test(enable = "avx512f,avx512vl")]
57252    unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() {
57253        let a = _mm256_set1_epi32(i32::MAX);
57254        let mut r = _mm_set1_epi8(0);
57255        _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57256        #[rustfmt::skip]
57257        let e = _mm_set_epi8(
57258            0, 0, 0, 0,
57259            0, 0, 0, 0,
57260            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57261            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57262        );
57263        assert_eq_m128i(r, e);
57264    }
57265
57266    #[simd_test(enable = "avx512f,avx512vl")]
57267    unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() {
57268        let a = _mm_set1_epi32(i32::MAX);
57269        let mut r = _mm_set1_epi8(0);
57270        _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57271        #[rustfmt::skip]
57272        let e = _mm_set_epi8(
57273            0, 0, 0, 0,
57274            0, 0, 0, 0,
57275            0, 0, 0, 0,
57276            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57277        );
57278        assert_eq_m128i(r, e);
57279    }
57280
57281    #[simd_test(enable = "avx512f")]
57282    unsafe fn test_mm512_storeu_epi32() {
57283        let a = _mm512_set1_epi32(9);
57284        let mut r = _mm512_undefined_epi32();
57285        _mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57286        assert_eq_m512i(r, a);
57287    }
57288
57289    #[simd_test(enable = "avx512f,avx512vl")]
57290    unsafe fn test_mm256_storeu_epi32() {
57291        let a = _mm256_set1_epi32(9);
57292        let mut r = _mm256_undefined_si256();
57293        _mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57294        assert_eq_m256i(r, a);
57295    }
57296
57297    #[simd_test(enable = "avx512f,avx512vl")]
57298    unsafe fn test_mm_storeu_epi32() {
57299        let a = _mm_set1_epi32(9);
57300        let mut r = _mm_undefined_si128();
57301        _mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57302        assert_eq_m128i(r, a);
57303    }
57304
57305    #[simd_test(enable = "avx512f")]
57306    unsafe fn test_mm512_loadu_si512() {
57307        let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
57308        let p = a.as_ptr().cast();
57309        let r = _mm512_loadu_si512(black_box(p));
57310        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57311        assert_eq_m512i(r, e);
57312    }
57313
57314    #[simd_test(enable = "avx512f")]
57315    unsafe fn test_mm512_storeu_si512() {
57316        let a = _mm512_set1_epi32(9);
57317        let mut r = _mm512_undefined_epi32();
57318        _mm512_storeu_si512(&mut r as *mut _, a);
57319        assert_eq_m512i(r, a);
57320    }
57321
57322    #[simd_test(enable = "avx512f")]
57323    unsafe fn test_mm512_load_si512() {
57324        #[repr(align(64))]
57325        struct Align {
57326            data: [i32; 16], // 64 bytes
57327        }
57328        let a = Align {
57329            data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
57330        };
57331        let p = (a.data).as_ptr().cast();
57332        let r = _mm512_load_si512(black_box(p));
57333        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57334        assert_eq_m512i(r, e);
57335    }
57336
57337    #[simd_test(enable = "avx512f")]
57338    unsafe fn test_mm512_store_si512() {
57339        let a = _mm512_set1_epi32(9);
57340        let mut r = _mm512_undefined_epi32();
57341        _mm512_store_si512(&mut r as *mut _, a);
57342        assert_eq_m512i(r, a);
57343    }
57344
57345    #[simd_test(enable = "avx512f")]
57346    unsafe fn test_mm512_load_epi32() {
57347        #[repr(align(64))]
57348        struct Align {
57349            data: [i32; 16], // 64 bytes
57350        }
57351        let a = Align {
57352            data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
57353        };
57354        let p = (a.data).as_ptr();
57355        let r = _mm512_load_epi32(black_box(p));
57356        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57357        assert_eq_m512i(r, e);
57358    }
57359
57360    #[simd_test(enable = "avx512f,avx512vl")]
57361    unsafe fn test_mm256_load_epi32() {
57362        #[repr(align(64))]
57363        struct Align {
57364            data: [i32; 8],
57365        }
57366        let a = Align {
57367            data: [4, 3, 2, 5, 8, 9, 64, 50],
57368        };
57369        let p = (a.data).as_ptr();
57370        let r = _mm256_load_epi32(black_box(p));
57371        let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
57372        assert_eq_m256i(r, e);
57373    }
57374
57375    #[simd_test(enable = "avx512f,avx512vl")]
57376    unsafe fn test_mm_load_epi32() {
57377        #[repr(align(64))]
57378        struct Align {
57379            data: [i32; 4],
57380        }
57381        let a = Align { data: [4, 3, 2, 5] };
57382        let p = (a.data).as_ptr();
57383        let r = _mm_load_epi32(black_box(p));
57384        let e = _mm_setr_epi32(4, 3, 2, 5);
57385        assert_eq_m128i(r, e);
57386    }
57387
57388    #[simd_test(enable = "avx512f")]
57389    unsafe fn test_mm512_store_epi32() {
57390        let a = _mm512_set1_epi32(9);
57391        let mut r = _mm512_undefined_epi32();
57392        _mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
57393        assert_eq_m512i(r, a);
57394    }
57395
57396    #[simd_test(enable = "avx512f,avx512vl")]
57397    unsafe fn test_mm256_store_epi32() {
57398        let a = _mm256_set1_epi32(9);
57399        let mut r = _mm256_undefined_si256();
57400        _mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
57401        assert_eq_m256i(r, a);
57402    }
57403
57404    #[simd_test(enable = "avx512f,avx512vl")]
57405    unsafe fn test_mm_store_epi32() {
57406        let a = _mm_set1_epi32(9);
57407        let mut r = _mm_undefined_si128();
57408        _mm_store_epi32(&mut r as *mut _ as *mut i32, a);
57409        assert_eq_m128i(r, a);
57410    }
57411
57412    #[simd_test(enable = "avx512f")]
57413    unsafe fn test_mm512_load_ps() {
57414        #[repr(align(64))]
57415        struct Align {
57416            data: [f32; 16], // 64 bytes
57417        }
57418        let a = Align {
57419            data: [
57420                4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
57421            ],
57422        };
57423        let p = (a.data).as_ptr();
57424        let r = _mm512_load_ps(black_box(p));
57425        let e = _mm512_setr_ps(
57426            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
57427        );
57428        assert_eq_m512(r, e);
57429    }
57430
57431    #[simd_test(enable = "avx512f")]
57432    unsafe fn test_mm512_store_ps() {
57433        let a = _mm512_set1_ps(9.);
57434        let mut r = _mm512_undefined_ps();
57435        _mm512_store_ps(&mut r as *mut _ as *mut f32, a);
57436        assert_eq_m512(r, a);
57437    }
57438
57439    #[simd_test(enable = "avx512f")]
57440    unsafe fn test_mm512_mask_set1_epi32() {
57441        let src = _mm512_set1_epi32(2);
57442        let a: i32 = 11;
57443        let r = _mm512_mask_set1_epi32(src, 0, a);
57444        assert_eq_m512i(r, src);
57445        let r = _mm512_mask_set1_epi32(src, 0b11111111_11111111, a);
57446        let e = _mm512_set1_epi32(11);
57447        assert_eq_m512i(r, e);
57448    }
57449
57450    #[simd_test(enable = "avx512f")]
57451    unsafe fn test_mm512_maskz_set1_epi32() {
57452        let a: i32 = 11;
57453        let r = _mm512_maskz_set1_epi32(0, a);
57454        assert_eq_m512i(r, _mm512_setzero_si512());
57455        let r = _mm512_maskz_set1_epi32(0b11111111_11111111, a);
57456        let e = _mm512_set1_epi32(11);
57457        assert_eq_m512i(r, e);
57458    }
57459
57460    #[simd_test(enable = "avx512f,avx512vl")]
57461    unsafe fn test_mm256_mask_set1_epi32() {
57462        let src = _mm256_set1_epi32(2);
57463        let a: i32 = 11;
57464        let r = _mm256_mask_set1_epi32(src, 0, a);
57465        assert_eq_m256i(r, src);
57466        let r = _mm256_mask_set1_epi32(src, 0b11111111, a);
57467        let e = _mm256_set1_epi32(11);
57468        assert_eq_m256i(r, e);
57469    }
57470
57471    #[simd_test(enable = "avx512f")]
57472    unsafe fn test_mm256_maskz_set1_epi32() {
57473        let a: i32 = 11;
57474        let r = _mm256_maskz_set1_epi32(0, a);
57475        assert_eq_m256i(r, _mm256_setzero_si256());
57476        let r = _mm256_maskz_set1_epi32(0b11111111, a);
57477        let e = _mm256_set1_epi32(11);
57478        assert_eq_m256i(r, e);
57479    }
57480
57481    #[simd_test(enable = "avx512f,avx512vl")]
57482    unsafe fn test_mm_mask_set1_epi32() {
57483        let src = _mm_set1_epi32(2);
57484        let a: i32 = 11;
57485        let r = _mm_mask_set1_epi32(src, 0, a);
57486        assert_eq_m128i(r, src);
57487        let r = _mm_mask_set1_epi32(src, 0b00001111, a);
57488        let e = _mm_set1_epi32(11);
57489        assert_eq_m128i(r, e);
57490    }
57491
57492    #[simd_test(enable = "avx512f")]
57493    unsafe fn test_mm_maskz_set1_epi32() {
57494        let a: i32 = 11;
57495        let r = _mm_maskz_set1_epi32(0, a);
57496        assert_eq_m128i(r, _mm_setzero_si128());
57497        let r = _mm_maskz_set1_epi32(0b00001111, a);
57498        let e = _mm_set1_epi32(11);
57499        assert_eq_m128i(r, e);
57500    }
57501
57502    #[simd_test(enable = "avx512f")]
57503    unsafe fn test_mm_mask_move_ss() {
57504        let src = _mm_set_ps(10., 11., 100., 110.);
57505        let a = _mm_set_ps(1., 2., 10., 20.);
57506        let b = _mm_set_ps(3., 4., 30., 40.);
57507        let r = _mm_mask_move_ss(src, 0, a, b);
57508        let e = _mm_set_ps(1., 2., 10., 110.);
57509        assert_eq_m128(r, e);
57510        let r = _mm_mask_move_ss(src, 0b11111111, a, b);
57511        let e = _mm_set_ps(1., 2., 10., 40.);
57512        assert_eq_m128(r, e);
57513    }
57514
57515    #[simd_test(enable = "avx512f")]
57516    unsafe fn test_mm_maskz_move_ss() {
57517        let a = _mm_set_ps(1., 2., 10., 20.);
57518        let b = _mm_set_ps(3., 4., 30., 40.);
57519        let r = _mm_maskz_move_ss(0, a, b);
57520        let e = _mm_set_ps(1., 2., 10., 0.);
57521        assert_eq_m128(r, e);
57522        let r = _mm_maskz_move_ss(0b11111111, a, b);
57523        let e = _mm_set_ps(1., 2., 10., 40.);
57524        assert_eq_m128(r, e);
57525    }
57526
57527    #[simd_test(enable = "avx512f")]
57528    unsafe fn test_mm_mask_move_sd() {
57529        let src = _mm_set_pd(10., 11.);
57530        let a = _mm_set_pd(1., 2.);
57531        let b = _mm_set_pd(3., 4.);
57532        let r = _mm_mask_move_sd(src, 0, a, b);
57533        let e = _mm_set_pd(1., 11.);
57534        assert_eq_m128d(r, e);
57535        let r = _mm_mask_move_sd(src, 0b11111111, a, b);
57536        let e = _mm_set_pd(1., 4.);
57537        assert_eq_m128d(r, e);
57538    }
57539
57540    #[simd_test(enable = "avx512f")]
57541    unsafe fn test_mm_maskz_move_sd() {
57542        let a = _mm_set_pd(1., 2.);
57543        let b = _mm_set_pd(3., 4.);
57544        let r = _mm_maskz_move_sd(0, a, b);
57545        let e = _mm_set_pd(1., 0.);
57546        assert_eq_m128d(r, e);
57547        let r = _mm_maskz_move_sd(0b11111111, a, b);
57548        let e = _mm_set_pd(1., 4.);
57549        assert_eq_m128d(r, e);
57550    }
57551
57552    #[simd_test(enable = "avx512f")]
57553    unsafe fn test_mm_mask_add_ss() {
57554        let src = _mm_set_ps(10., 11., 100., 110.);
57555        let a = _mm_set_ps(1., 2., 10., 20.);
57556        let b = _mm_set_ps(3., 4., 30., 40.);
57557        let r = _mm_mask_add_ss(src, 0, a, b);
57558        let e = _mm_set_ps(1., 2., 10., 110.);
57559        assert_eq_m128(r, e);
57560        let r = _mm_mask_add_ss(src, 0b11111111, a, b);
57561        let e = _mm_set_ps(1., 2., 10., 60.);
57562        assert_eq_m128(r, e);
57563    }
57564
57565    #[simd_test(enable = "avx512f")]
57566    unsafe fn test_mm_maskz_add_ss() {
57567        let a = _mm_set_ps(1., 2., 10., 20.);
57568        let b = _mm_set_ps(3., 4., 30., 40.);
57569        let r = _mm_maskz_add_ss(0, a, b);
57570        let e = _mm_set_ps(1., 2., 10., 0.);
57571        assert_eq_m128(r, e);
57572        let r = _mm_maskz_add_ss(0b11111111, a, b);
57573        let e = _mm_set_ps(1., 2., 10., 60.);
57574        assert_eq_m128(r, e);
57575    }
57576
57577    #[simd_test(enable = "avx512f")]
57578    unsafe fn test_mm_mask_add_sd() {
57579        let src = _mm_set_pd(10., 11.);
57580        let a = _mm_set_pd(1., 2.);
57581        let b = _mm_set_pd(3., 4.);
57582        let r = _mm_mask_add_sd(src, 0, a, b);
57583        let e = _mm_set_pd(1., 11.);
57584        assert_eq_m128d(r, e);
57585        let r = _mm_mask_add_sd(src, 0b11111111, a, b);
57586        let e = _mm_set_pd(1., 6.);
57587        assert_eq_m128d(r, e);
57588    }
57589
57590    #[simd_test(enable = "avx512f")]
57591    unsafe fn test_mm_maskz_add_sd() {
57592        let a = _mm_set_pd(1., 2.);
57593        let b = _mm_set_pd(3., 4.);
57594        let r = _mm_maskz_add_sd(0, a, b);
57595        let e = _mm_set_pd(1., 0.);
57596        assert_eq_m128d(r, e);
57597        let r = _mm_maskz_add_sd(0b11111111, a, b);
57598        let e = _mm_set_pd(1., 6.);
57599        assert_eq_m128d(r, e);
57600    }
57601
57602    #[simd_test(enable = "avx512f")]
57603    unsafe fn test_mm_mask_sub_ss() {
57604        let src = _mm_set_ps(10., 11., 100., 110.);
57605        let a = _mm_set_ps(1., 2., 10., 20.);
57606        let b = _mm_set_ps(3., 4., 30., 40.);
57607        let r = _mm_mask_sub_ss(src, 0, a, b);
57608        let e = _mm_set_ps(1., 2., 10., 110.);
57609        assert_eq_m128(r, e);
57610        let r = _mm_mask_sub_ss(src, 0b11111111, a, b);
57611        let e = _mm_set_ps(1., 2., 10., -20.);
57612        assert_eq_m128(r, e);
57613    }
57614
57615    #[simd_test(enable = "avx512f")]
57616    unsafe fn test_mm_maskz_sub_ss() {
57617        let a = _mm_set_ps(1., 2., 10., 20.);
57618        let b = _mm_set_ps(3., 4., 30., 40.);
57619        let r = _mm_maskz_sub_ss(0, a, b);
57620        let e = _mm_set_ps(1., 2., 10., 0.);
57621        assert_eq_m128(r, e);
57622        let r = _mm_maskz_sub_ss(0b11111111, a, b);
57623        let e = _mm_set_ps(1., 2., 10., -20.);
57624        assert_eq_m128(r, e);
57625    }
57626
57627    #[simd_test(enable = "avx512f")]
57628    unsafe fn test_mm_mask_sub_sd() {
57629        let src = _mm_set_pd(10., 11.);
57630        let a = _mm_set_pd(1., 2.);
57631        let b = _mm_set_pd(3., 4.);
57632        let r = _mm_mask_sub_sd(src, 0, a, b);
57633        let e = _mm_set_pd(1., 11.);
57634        assert_eq_m128d(r, e);
57635        let r = _mm_mask_sub_sd(src, 0b11111111, a, b);
57636        let e = _mm_set_pd(1., -2.);
57637        assert_eq_m128d(r, e);
57638    }
57639
57640    #[simd_test(enable = "avx512f")]
57641    unsafe fn test_mm_maskz_sub_sd() {
57642        let a = _mm_set_pd(1., 2.);
57643        let b = _mm_set_pd(3., 4.);
57644        let r = _mm_maskz_sub_sd(0, a, b);
57645        let e = _mm_set_pd(1., 0.);
57646        assert_eq_m128d(r, e);
57647        let r = _mm_maskz_sub_sd(0b11111111, a, b);
57648        let e = _mm_set_pd(1., -2.);
57649        assert_eq_m128d(r, e);
57650    }
57651
57652    #[simd_test(enable = "avx512f")]
57653    unsafe fn test_mm_mask_mul_ss() {
57654        let src = _mm_set_ps(10., 11., 100., 110.);
57655        let a = _mm_set_ps(1., 2., 10., 20.);
57656        let b = _mm_set_ps(3., 4., 30., 40.);
57657        let r = _mm_mask_mul_ss(src, 0, a, b);
57658        let e = _mm_set_ps(1., 2., 10., 110.);
57659        assert_eq_m128(r, e);
57660        let r = _mm_mask_mul_ss(src, 0b11111111, a, b);
57661        let e = _mm_set_ps(1., 2., 10., 800.);
57662        assert_eq_m128(r, e);
57663    }
57664
57665    #[simd_test(enable = "avx512f")]
57666    unsafe fn test_mm_maskz_mul_ss() {
57667        let a = _mm_set_ps(1., 2., 10., 20.);
57668        let b = _mm_set_ps(3., 4., 30., 40.);
57669        let r = _mm_maskz_mul_ss(0, a, b);
57670        let e = _mm_set_ps(1., 2., 10., 0.);
57671        assert_eq_m128(r, e);
57672        let r = _mm_maskz_mul_ss(0b11111111, a, b);
57673        let e = _mm_set_ps(1., 2., 10., 800.);
57674        assert_eq_m128(r, e);
57675    }
57676
57677    #[simd_test(enable = "avx512f")]
57678    unsafe fn test_mm_mask_mul_sd() {
57679        let src = _mm_set_pd(10., 11.);
57680        let a = _mm_set_pd(1., 2.);
57681        let b = _mm_set_pd(3., 4.);
57682        let r = _mm_mask_mul_sd(src, 0, a, b);
57683        let e = _mm_set_pd(1., 11.);
57684        assert_eq_m128d(r, e);
57685        let r = _mm_mask_mul_sd(src, 0b11111111, a, b);
57686        let e = _mm_set_pd(1., 8.);
57687        assert_eq_m128d(r, e);
57688    }
57689
57690    #[simd_test(enable = "avx512f")]
57691    unsafe fn test_mm_maskz_mul_sd() {
57692        let a = _mm_set_pd(1., 2.);
57693        let b = _mm_set_pd(3., 4.);
57694        let r = _mm_maskz_mul_sd(0, a, b);
57695        let e = _mm_set_pd(1., 0.);
57696        assert_eq_m128d(r, e);
57697        let r = _mm_maskz_mul_sd(0b11111111, a, b);
57698        let e = _mm_set_pd(1., 8.);
57699        assert_eq_m128d(r, e);
57700    }
57701
57702    #[simd_test(enable = "avx512f")]
57703    unsafe fn test_mm_mask_div_ss() {
57704        let src = _mm_set_ps(10., 11., 100., 110.);
57705        let a = _mm_set_ps(1., 2., 10., 20.);
57706        let b = _mm_set_ps(3., 4., 30., 40.);
57707        let r = _mm_mask_div_ss(src, 0, a, b);
57708        let e = _mm_set_ps(1., 2., 10., 110.);
57709        assert_eq_m128(r, e);
57710        let r = _mm_mask_div_ss(src, 0b11111111, a, b);
57711        let e = _mm_set_ps(1., 2., 10., 0.5);
57712        assert_eq_m128(r, e);
57713    }
57714
57715    #[simd_test(enable = "avx512f")]
57716    unsafe fn test_mm_maskz_div_ss() {
57717        let a = _mm_set_ps(1., 2., 10., 20.);
57718        let b = _mm_set_ps(3., 4., 30., 40.);
57719        let r = _mm_maskz_div_ss(0, a, b);
57720        let e = _mm_set_ps(1., 2., 10., 0.);
57721        assert_eq_m128(r, e);
57722        let r = _mm_maskz_div_ss(0b11111111, a, b);
57723        let e = _mm_set_ps(1., 2., 10., 0.5);
57724        assert_eq_m128(r, e);
57725    }
57726
57727    #[simd_test(enable = "avx512f")]
57728    unsafe fn test_mm_mask_div_sd() {
57729        let src = _mm_set_pd(10., 11.);
57730        let a = _mm_set_pd(1., 2.);
57731        let b = _mm_set_pd(3., 4.);
57732        let r = _mm_mask_div_sd(src, 0, a, b);
57733        let e = _mm_set_pd(1., 11.);
57734        assert_eq_m128d(r, e);
57735        let r = _mm_mask_div_sd(src, 0b11111111, a, b);
57736        let e = _mm_set_pd(1., 0.5);
57737        assert_eq_m128d(r, e);
57738    }
57739
57740    #[simd_test(enable = "avx512f")]
57741    unsafe fn test_mm_maskz_div_sd() {
57742        let a = _mm_set_pd(1., 2.);
57743        let b = _mm_set_pd(3., 4.);
57744        let r = _mm_maskz_div_sd(0, a, b);
57745        let e = _mm_set_pd(1., 0.);
57746        assert_eq_m128d(r, e);
57747        let r = _mm_maskz_div_sd(0b11111111, a, b);
57748        let e = _mm_set_pd(1., 0.5);
57749        assert_eq_m128d(r, e);
57750    }
57751
57752    #[simd_test(enable = "avx512f")]
57753    unsafe fn test_mm_mask_max_ss() {
57754        let a = _mm_set_ps(0., 1., 2., 3.);
57755        let b = _mm_set_ps(4., 5., 6., 7.);
57756        let r = _mm_mask_max_ss(a, 0, a, b);
57757        let e = _mm_set_ps(0., 1., 2., 3.);
57758        assert_eq_m128(r, e);
57759        let r = _mm_mask_max_ss(a, 0b11111111, a, b);
57760        let e = _mm_set_ps(0., 1., 2., 7.);
57761        assert_eq_m128(r, e);
57762    }
57763
57764    #[simd_test(enable = "avx512f")]
57765    unsafe fn test_mm_maskz_max_ss() {
57766        let a = _mm_set_ps(0., 1., 2., 3.);
57767        let b = _mm_set_ps(4., 5., 6., 7.);
57768        let r = _mm_maskz_max_ss(0, a, b);
57769        let e = _mm_set_ps(0., 1., 2., 0.);
57770        assert_eq_m128(r, e);
57771        let r = _mm_maskz_max_ss(0b11111111, a, b);
57772        let e = _mm_set_ps(0., 1., 2., 7.);
57773        assert_eq_m128(r, e);
57774    }
57775
57776    #[simd_test(enable = "avx512f")]
57777    unsafe fn test_mm_mask_max_sd() {
57778        let a = _mm_set_pd(0., 1.);
57779        let b = _mm_set_pd(2., 3.);
57780        let r = _mm_mask_max_sd(a, 0, a, b);
57781        let e = _mm_set_pd(0., 1.);
57782        assert_eq_m128d(r, e);
57783        let r = _mm_mask_max_sd(a, 0b11111111, a, b);
57784        let e = _mm_set_pd(0., 3.);
57785        assert_eq_m128d(r, e);
57786    }
57787
57788    #[simd_test(enable = "avx512f")]
57789    unsafe fn test_mm_maskz_max_sd() {
57790        let a = _mm_set_pd(0., 1.);
57791        let b = _mm_set_pd(2., 3.);
57792        let r = _mm_maskz_max_sd(0, a, b);
57793        let e = _mm_set_pd(0., 0.);
57794        assert_eq_m128d(r, e);
57795        let r = _mm_maskz_max_sd(0b11111111, a, b);
57796        let e = _mm_set_pd(0., 3.);
57797        assert_eq_m128d(r, e);
57798    }
57799
57800    #[simd_test(enable = "avx512f")]
57801    unsafe fn test_mm_mask_min_ss() {
57802        let a = _mm_set_ps(0., 1., 2., 3.);
57803        let b = _mm_set_ps(4., 5., 6., 7.);
57804        let r = _mm_mask_min_ss(a, 0, a, b);
57805        let e = _mm_set_ps(0., 1., 2., 3.);
57806        assert_eq_m128(r, e);
57807        let r = _mm_mask_min_ss(a, 0b11111111, a, b);
57808        let e = _mm_set_ps(0., 1., 2., 3.);
57809        assert_eq_m128(r, e);
57810    }
57811
57812    #[simd_test(enable = "avx512f")]
57813    unsafe fn test_mm_maskz_min_ss() {
57814        let a = _mm_set_ps(0., 1., 2., 3.);
57815        let b = _mm_set_ps(4., 5., 6., 7.);
57816        let r = _mm_maskz_min_ss(0, a, b);
57817        let e = _mm_set_ps(0., 1., 2., 0.);
57818        assert_eq_m128(r, e);
57819        let r = _mm_maskz_min_ss(0b11111111, a, b);
57820        let e = _mm_set_ps(0., 1., 2., 3.);
57821        assert_eq_m128(r, e);
57822    }
57823
57824    #[simd_test(enable = "avx512f")]
57825    unsafe fn test_mm_mask_min_sd() {
57826        let a = _mm_set_pd(0., 1.);
57827        let b = _mm_set_pd(2., 3.);
57828        let r = _mm_mask_min_sd(a, 0, a, b);
57829        let e = _mm_set_pd(0., 1.);
57830        assert_eq_m128d(r, e);
57831        let r = _mm_mask_min_sd(a, 0b11111111, a, b);
57832        let e = _mm_set_pd(0., 1.);
57833        assert_eq_m128d(r, e);
57834    }
57835
57836    #[simd_test(enable = "avx512f")]
57837    unsafe fn test_mm_maskz_min_sd() {
57838        let a = _mm_set_pd(0., 1.);
57839        let b = _mm_set_pd(2., 3.);
57840        let r = _mm_maskz_min_sd(0, a, b);
57841        let e = _mm_set_pd(0., 0.);
57842        assert_eq_m128d(r, e);
57843        let r = _mm_maskz_min_sd(0b11111111, a, b);
57844        let e = _mm_set_pd(0., 1.);
57845        assert_eq_m128d(r, e);
57846    }
57847
57848    #[simd_test(enable = "avx512f")]
57849    unsafe fn test_mm_mask_sqrt_ss() {
57850        let src = _mm_set_ps(10., 11., 100., 110.);
57851        let a = _mm_set_ps(1., 2., 10., 20.);
57852        let b = _mm_set_ps(3., 4., 30., 4.);
57853        let r = _mm_mask_sqrt_ss(src, 0, a, b);
57854        let e = _mm_set_ps(1., 2., 10., 110.);
57855        assert_eq_m128(r, e);
57856        let r = _mm_mask_sqrt_ss(src, 0b11111111, a, b);
57857        let e = _mm_set_ps(1., 2., 10., 2.);
57858        assert_eq_m128(r, e);
57859    }
57860
57861    #[simd_test(enable = "avx512f")]
57862    unsafe fn test_mm_maskz_sqrt_ss() {
57863        let a = _mm_set_ps(1., 2., 10., 20.);
57864        let b = _mm_set_ps(3., 4., 30., 4.);
57865        let r = _mm_maskz_sqrt_ss(0, a, b);
57866        let e = _mm_set_ps(1., 2., 10., 0.);
57867        assert_eq_m128(r, e);
57868        let r = _mm_maskz_sqrt_ss(0b11111111, a, b);
57869        let e = _mm_set_ps(1., 2., 10., 2.);
57870        assert_eq_m128(r, e);
57871    }
57872
57873    #[simd_test(enable = "avx512f")]
57874    unsafe fn test_mm_mask_sqrt_sd() {
57875        let src = _mm_set_pd(10., 11.);
57876        let a = _mm_set_pd(1., 2.);
57877        let b = _mm_set_pd(3., 4.);
57878        let r = _mm_mask_sqrt_sd(src, 0, a, b);
57879        let e = _mm_set_pd(1., 11.);
57880        assert_eq_m128d(r, e);
57881        let r = _mm_mask_sqrt_sd(src, 0b11111111, a, b);
57882        let e = _mm_set_pd(1., 2.);
57883        assert_eq_m128d(r, e);
57884    }
57885
57886    #[simd_test(enable = "avx512f")]
57887    unsafe fn test_mm_maskz_sqrt_sd() {
57888        let a = _mm_set_pd(1., 2.);
57889        let b = _mm_set_pd(3., 4.);
57890        let r = _mm_maskz_sqrt_sd(0, a, b);
57891        let e = _mm_set_pd(1., 0.);
57892        assert_eq_m128d(r, e);
57893        let r = _mm_maskz_sqrt_sd(0b11111111, a, b);
57894        let e = _mm_set_pd(1., 2.);
57895        assert_eq_m128d(r, e);
57896    }
57897
57898    #[simd_test(enable = "avx512f")]
57899    unsafe fn test_mm_rsqrt14_ss() {
57900        let a = _mm_set_ps(1., 2., 10., 20.);
57901        let b = _mm_set_ps(3., 4., 30., 4.);
57902        let r = _mm_rsqrt14_ss(a, b);
57903        let e = _mm_set_ps(1., 2., 10., 0.5);
57904        assert_eq_m128(r, e);
57905    }
57906
57907    #[simd_test(enable = "avx512f")]
57908    unsafe fn test_mm_mask_rsqrt14_ss() {
57909        let src = _mm_set_ps(10., 11., 100., 110.);
57910        let a = _mm_set_ps(1., 2., 10., 20.);
57911        let b = _mm_set_ps(3., 4., 30., 4.);
57912        let r = _mm_mask_rsqrt14_ss(src, 0, a, b);
57913        let e = _mm_set_ps(1., 2., 10., 110.);
57914        assert_eq_m128(r, e);
57915        let r = _mm_mask_rsqrt14_ss(src, 0b11111111, a, b);
57916        let e = _mm_set_ps(1., 2., 10., 0.5);
57917        assert_eq_m128(r, e);
57918    }
57919
57920    #[simd_test(enable = "avx512f")]
57921    unsafe fn test_mm_maskz_rsqrt14_ss() {
57922        let a = _mm_set_ps(1., 2., 10., 20.);
57923        let b = _mm_set_ps(3., 4., 30., 4.);
57924        let r = _mm_maskz_rsqrt14_ss(0, a, b);
57925        let e = _mm_set_ps(1., 2., 10., 0.);
57926        assert_eq_m128(r, e);
57927        let r = _mm_maskz_rsqrt14_ss(0b11111111, a, b);
57928        let e = _mm_set_ps(1., 2., 10., 0.5);
57929        assert_eq_m128(r, e);
57930    }
57931
57932    #[simd_test(enable = "avx512f")]
57933    unsafe fn test_mm_rsqrt14_sd() {
57934        let a = _mm_set_pd(1., 2.);
57935        let b = _mm_set_pd(3., 4.);
57936        let r = _mm_rsqrt14_sd(a, b);
57937        let e = _mm_set_pd(1., 0.5);
57938        assert_eq_m128d(r, e);
57939    }
57940
57941    #[simd_test(enable = "avx512f")]
57942    unsafe fn test_mm_mask_rsqrt14_sd() {
57943        let src = _mm_set_pd(10., 11.);
57944        let a = _mm_set_pd(1., 2.);
57945        let b = _mm_set_pd(3., 4.);
57946        let r = _mm_mask_rsqrt14_sd(src, 0, a, b);
57947        let e = _mm_set_pd(1., 11.);
57948        assert_eq_m128d(r, e);
57949        let r = _mm_mask_rsqrt14_sd(src, 0b11111111, a, b);
57950        let e = _mm_set_pd(1., 0.5);
57951        assert_eq_m128d(r, e);
57952    }
57953
57954    #[simd_test(enable = "avx512f")]
57955    unsafe fn test_mm_maskz_rsqrt14_sd() {
57956        let a = _mm_set_pd(1., 2.);
57957        let b = _mm_set_pd(3., 4.);
57958        let r = _mm_maskz_rsqrt14_sd(0, a, b);
57959        let e = _mm_set_pd(1., 0.);
57960        assert_eq_m128d(r, e);
57961        let r = _mm_maskz_rsqrt14_sd(0b11111111, a, b);
57962        let e = _mm_set_pd(1., 0.5);
57963        assert_eq_m128d(r, e);
57964    }
57965
57966    #[simd_test(enable = "avx512f")]
57967    unsafe fn test_mm_rcp14_ss() {
57968        let a = _mm_set_ps(1., 2., 10., 20.);
57969        let b = _mm_set_ps(3., 4., 30., 4.);
57970        let r = _mm_rcp14_ss(a, b);
57971        let e = _mm_set_ps(1., 2., 10., 0.25);
57972        assert_eq_m128(r, e);
57973    }
57974
57975    #[simd_test(enable = "avx512f")]
57976    unsafe fn test_mm_mask_rcp14_ss() {
57977        let src = _mm_set_ps(10., 11., 100., 110.);
57978        let a = _mm_set_ps(1., 2., 10., 20.);
57979        let b = _mm_set_ps(3., 4., 30., 4.);
57980        let r = _mm_mask_rcp14_ss(src, 0, a, b);
57981        let e = _mm_set_ps(1., 2., 10., 110.);
57982        assert_eq_m128(r, e);
57983        let r = _mm_mask_rcp14_ss(src, 0b11111111, a, b);
57984        let e = _mm_set_ps(1., 2., 10., 0.25);
57985        assert_eq_m128(r, e);
57986    }
57987
57988    #[simd_test(enable = "avx512f")]
57989    unsafe fn test_mm_maskz_rcp14_ss() {
57990        let a = _mm_set_ps(1., 2., 10., 20.);
57991        let b = _mm_set_ps(3., 4., 30., 4.);
57992        let r = _mm_maskz_rcp14_ss(0, a, b);
57993        let e = _mm_set_ps(1., 2., 10., 0.);
57994        assert_eq_m128(r, e);
57995        let r = _mm_maskz_rcp14_ss(0b11111111, a, b);
57996        let e = _mm_set_ps(1., 2., 10., 0.25);
57997        assert_eq_m128(r, e);
57998    }
57999
58000    #[simd_test(enable = "avx512f")]
58001    unsafe fn test_mm_rcp14_sd() {
58002        let a = _mm_set_pd(1., 2.);
58003        let b = _mm_set_pd(3., 4.);
58004        let r = _mm_rcp14_sd(a, b);
58005        let e = _mm_set_pd(1., 0.25);
58006        assert_eq_m128d(r, e);
58007    }
58008
58009    #[simd_test(enable = "avx512f")]
58010    unsafe fn test_mm_mask_rcp14_sd() {
58011        let src = _mm_set_pd(10., 11.);
58012        let a = _mm_set_pd(1., 2.);
58013        let b = _mm_set_pd(3., 4.);
58014        let r = _mm_mask_rcp14_sd(src, 0, a, b);
58015        let e = _mm_set_pd(1., 11.);
58016        assert_eq_m128d(r, e);
58017        let r = _mm_mask_rcp14_sd(src, 0b11111111, a, b);
58018        let e = _mm_set_pd(1., 0.25);
58019        assert_eq_m128d(r, e);
58020    }
58021
58022    #[simd_test(enable = "avx512f")]
58023    unsafe fn test_mm_maskz_rcp14_sd() {
58024        let a = _mm_set_pd(1., 2.);
58025        let b = _mm_set_pd(3., 4.);
58026        let r = _mm_maskz_rcp14_sd(0, a, b);
58027        let e = _mm_set_pd(1., 0.);
58028        assert_eq_m128d(r, e);
58029        let r = _mm_maskz_rcp14_sd(0b11111111, a, b);
58030        let e = _mm_set_pd(1., 0.25);
58031        assert_eq_m128d(r, e);
58032    }
58033
58034    #[simd_test(enable = "avx512f")]
58035    unsafe fn test_mm_getexp_ss() {
58036        let a = _mm_set1_ps(2.);
58037        let b = _mm_set1_ps(3.);
58038        let r = _mm_getexp_ss(a, b);
58039        let e = _mm_set_ps(2., 2., 2., 1.);
58040        assert_eq_m128(r, e);
58041    }
58042
58043    #[simd_test(enable = "avx512f")]
58044    unsafe fn test_mm_mask_getexp_ss() {
58045        let a = _mm_set1_ps(2.);
58046        let b = _mm_set1_ps(3.);
58047        let r = _mm_mask_getexp_ss(a, 0, a, b);
58048        let e = _mm_set_ps(2., 2., 2., 2.);
58049        assert_eq_m128(r, e);
58050        let r = _mm_mask_getexp_ss(a, 0b11111111, a, b);
58051        let e = _mm_set_ps(2., 2., 2., 1.);
58052        assert_eq_m128(r, e);
58053    }
58054
58055    #[simd_test(enable = "avx512f")]
58056    unsafe fn test_mm_maskz_getexp_ss() {
58057        let a = _mm_set1_ps(2.);
58058        let b = _mm_set1_ps(3.);
58059        let r = _mm_maskz_getexp_ss(0, a, b);
58060        let e = _mm_set_ps(2., 2., 2., 0.);
58061        assert_eq_m128(r, e);
58062        let r = _mm_maskz_getexp_ss(0b11111111, a, b);
58063        let e = _mm_set_ps(2., 2., 2., 1.);
58064        assert_eq_m128(r, e);
58065    }
58066
58067    #[simd_test(enable = "avx512f")]
58068    unsafe fn test_mm_getexp_sd() {
58069        let a = _mm_set1_pd(2.);
58070        let b = _mm_set1_pd(3.);
58071        let r = _mm_getexp_sd(a, b);
58072        let e = _mm_set_pd(2., 1.);
58073        assert_eq_m128d(r, e);
58074    }
58075
58076    #[simd_test(enable = "avx512f")]
58077    unsafe fn test_mm_mask_getexp_sd() {
58078        let a = _mm_set1_pd(2.);
58079        let b = _mm_set1_pd(3.);
58080        let r = _mm_mask_getexp_sd(a, 0, a, b);
58081        let e = _mm_set_pd(2., 2.);
58082        assert_eq_m128d(r, e);
58083        let r = _mm_mask_getexp_sd(a, 0b11111111, a, b);
58084        let e = _mm_set_pd(2., 1.);
58085        assert_eq_m128d(r, e);
58086    }
58087
58088    #[simd_test(enable = "avx512f")]
58089    unsafe fn test_mm_maskz_getexp_sd() {
58090        let a = _mm_set1_pd(2.);
58091        let b = _mm_set1_pd(3.);
58092        let r = _mm_maskz_getexp_sd(0, a, b);
58093        let e = _mm_set_pd(2., 0.);
58094        assert_eq_m128d(r, e);
58095        let r = _mm_maskz_getexp_sd(0b11111111, a, b);
58096        let e = _mm_set_pd(2., 1.);
58097        assert_eq_m128d(r, e);
58098    }
58099
58100    #[simd_test(enable = "avx512f")]
58101    unsafe fn test_mm_getmant_ss() {
58102        let a = _mm_set1_ps(20.);
58103        let b = _mm_set1_ps(10.);
58104        let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
58105        let e = _mm_set_ps(20., 20., 20., 1.25);
58106        assert_eq_m128(r, e);
58107    }
58108
58109    #[simd_test(enable = "avx512f")]
58110    unsafe fn test_mm_mask_getmant_ss() {
58111        let a = _mm_set1_ps(20.);
58112        let b = _mm_set1_ps(10.);
58113        let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
58114        let e = _mm_set_ps(20., 20., 20., 20.);
58115        assert_eq_m128(r, e);
58116        let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
58117        let e = _mm_set_ps(20., 20., 20., 1.25);
58118        assert_eq_m128(r, e);
58119    }
58120
58121    #[simd_test(enable = "avx512f")]
58122    unsafe fn test_mm_maskz_getmant_ss() {
58123        let a = _mm_set1_ps(20.);
58124        let b = _mm_set1_ps(10.);
58125        let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
58126        let e = _mm_set_ps(20., 20., 20., 0.);
58127        assert_eq_m128(r, e);
58128        let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
58129        let e = _mm_set_ps(20., 20., 20., 1.25);
58130        assert_eq_m128(r, e);
58131    }
58132
58133    #[simd_test(enable = "avx512f")]
58134    unsafe fn test_mm_getmant_sd() {
58135        let a = _mm_set1_pd(20.);
58136        let b = _mm_set1_pd(10.);
58137        let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
58138        let e = _mm_set_pd(20., 1.25);
58139        assert_eq_m128d(r, e);
58140    }
58141
58142    #[simd_test(enable = "avx512f")]
58143    unsafe fn test_mm_mask_getmant_sd() {
58144        let a = _mm_set1_pd(20.);
58145        let b = _mm_set1_pd(10.);
58146        let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
58147        let e = _mm_set_pd(20., 20.);
58148        assert_eq_m128d(r, e);
58149        let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
58150        let e = _mm_set_pd(20., 1.25);
58151        assert_eq_m128d(r, e);
58152    }
58153
58154    #[simd_test(enable = "avx512f")]
58155    unsafe fn test_mm_maskz_getmant_sd() {
58156        let a = _mm_set1_pd(20.);
58157        let b = _mm_set1_pd(10.);
58158        let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
58159        let e = _mm_set_pd(20., 0.);
58160        assert_eq_m128d(r, e);
58161        let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
58162        let e = _mm_set_pd(20., 1.25);
58163        assert_eq_m128d(r, e);
58164    }
58165
58166    #[simd_test(enable = "avx512f")]
58167    unsafe fn test_mm_roundscale_ss() {
58168        let a = _mm_set1_ps(2.2);
58169        let b = _mm_set1_ps(1.1);
58170        let r = _mm_roundscale_ss::<0>(a, b);
58171        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58172        assert_eq_m128(r, e);
58173    }
58174
58175    #[simd_test(enable = "avx512f")]
58176    unsafe fn test_mm_mask_roundscale_ss() {
58177        let a = _mm_set1_ps(2.2);
58178        let b = _mm_set1_ps(1.1);
58179        let r = _mm_mask_roundscale_ss::<0>(a, 0, a, b);
58180        let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
58181        assert_eq_m128(r, e);
58182        let r = _mm_mask_roundscale_ss::<0>(a, 0b11111111, a, b);
58183        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58184        assert_eq_m128(r, e);
58185    }
58186
58187    #[simd_test(enable = "avx512f")]
58188    unsafe fn test_mm_maskz_roundscale_ss() {
58189        let a = _mm_set1_ps(2.2);
58190        let b = _mm_set1_ps(1.1);
58191        let r = _mm_maskz_roundscale_ss::<0>(0, a, b);
58192        let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
58193        assert_eq_m128(r, e);
58194        let r = _mm_maskz_roundscale_ss::<0>(0b11111111, a, b);
58195        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58196        assert_eq_m128(r, e);
58197    }
58198
58199    #[simd_test(enable = "avx512f")]
58200    unsafe fn test_mm_roundscale_sd() {
58201        let a = _mm_set1_pd(2.2);
58202        let b = _mm_set1_pd(1.1);
58203        let r = _mm_roundscale_sd::<0>(a, b);
58204        let e = _mm_set_pd(2.2, 1.0);
58205        assert_eq_m128d(r, e);
58206    }
58207
58208    #[simd_test(enable = "avx512f")]
58209    unsafe fn test_mm_mask_roundscale_sd() {
58210        let a = _mm_set1_pd(2.2);
58211        let b = _mm_set1_pd(1.1);
58212        let r = _mm_mask_roundscale_sd::<0>(a, 0, a, b);
58213        let e = _mm_set_pd(2.2, 2.2);
58214        assert_eq_m128d(r, e);
58215        let r = _mm_mask_roundscale_sd::<0>(a, 0b11111111, a, b);
58216        let e = _mm_set_pd(2.2, 1.0);
58217        assert_eq_m128d(r, e);
58218    }
58219
58220    #[simd_test(enable = "avx512f")]
58221    unsafe fn test_mm_maskz_roundscale_sd() {
58222        let a = _mm_set1_pd(2.2);
58223        let b = _mm_set1_pd(1.1);
58224        let r = _mm_maskz_roundscale_sd::<0>(0, a, b);
58225        let e = _mm_set_pd(2.2, 0.0);
58226        assert_eq_m128d(r, e);
58227        let r = _mm_maskz_roundscale_sd::<0>(0b11111111, a, b);
58228        let e = _mm_set_pd(2.2, 1.0);
58229        assert_eq_m128d(r, e);
58230    }
58231
58232    #[simd_test(enable = "avx512f")]
58233    unsafe fn test_mm_scalef_ss() {
58234        let a = _mm_set1_ps(1.);
58235        let b = _mm_set1_ps(3.);
58236        let r = _mm_scalef_ss(a, b);
58237        let e = _mm_set_ps(1., 1., 1., 8.);
58238        assert_eq_m128(r, e);
58239    }
58240
58241    #[simd_test(enable = "avx512f")]
58242    unsafe fn test_mm_mask_scalef_ss() {
58243        let a = _mm_set1_ps(1.);
58244        let b = _mm_set1_ps(3.);
58245        let r = _mm_mask_scalef_ss(a, 0, a, b);
58246        let e = _mm_set_ps(1., 1., 1., 1.);
58247        assert_eq_m128(r, e);
58248        let r = _mm_mask_scalef_ss(a, 0b11111111, a, b);
58249        let e = _mm_set_ps(1., 1., 1., 8.);
58250        assert_eq_m128(r, e);
58251    }
58252
58253    #[simd_test(enable = "avx512f")]
58254    unsafe fn test_mm_maskz_scalef_ss() {
58255        let a = _mm_set1_ps(1.);
58256        let b = _mm_set1_ps(3.);
58257        let r = _mm_maskz_scalef_ss(0, a, b);
58258        let e = _mm_set_ps(1., 1., 1., 0.);
58259        assert_eq_m128(r, e);
58260        let r = _mm_maskz_scalef_ss(0b11111111, a, b);
58261        let e = _mm_set_ps(1., 1., 1., 8.);
58262        assert_eq_m128(r, e);
58263    }
58264
58265    #[simd_test(enable = "avx512f")]
58266    unsafe fn test_mm_scalef_sd() {
58267        let a = _mm_set1_pd(1.);
58268        let b = _mm_set1_pd(3.);
58269        let r = _mm_scalef_sd(a, b);
58270        let e = _mm_set_pd(1., 8.);
58271        assert_eq_m128d(r, e);
58272    }
58273
58274    #[simd_test(enable = "avx512f")]
58275    unsafe fn test_mm_mask_scalef_sd() {
58276        let a = _mm_set1_pd(1.);
58277        let b = _mm_set1_pd(3.);
58278        let r = _mm_mask_scalef_sd(a, 0, a, b);
58279        let e = _mm_set_pd(1., 1.);
58280        assert_eq_m128d(r, e);
58281        let r = _mm_mask_scalef_sd(a, 0b11111111, a, b);
58282        let e = _mm_set_pd(1., 8.);
58283        assert_eq_m128d(r, e);
58284    }
58285
58286    #[simd_test(enable = "avx512f")]
58287    unsafe fn test_mm_maskz_scalef_sd() {
58288        let a = _mm_set1_pd(1.);
58289        let b = _mm_set1_pd(3.);
58290        let r = _mm_maskz_scalef_sd(0, a, b);
58291        let e = _mm_set_pd(1., 0.);
58292        assert_eq_m128d(r, e);
58293        let r = _mm_maskz_scalef_sd(0b11111111, a, b);
58294        let e = _mm_set_pd(1., 8.);
58295        assert_eq_m128d(r, e);
58296    }
58297
58298    #[simd_test(enable = "avx512f")]
58299    unsafe fn test_mm_mask_fmadd_ss() {
58300        let a = _mm_set1_ps(1.);
58301        let b = _mm_set1_ps(2.);
58302        let c = _mm_set1_ps(3.);
58303        let r = _mm_mask_fmadd_ss(a, 0, b, c);
58304        assert_eq_m128(r, a);
58305        let r = _mm_mask_fmadd_ss(a, 0b11111111, b, c);
58306        let e = _mm_set_ps(1., 1., 1., 5.);
58307        assert_eq_m128(r, e);
58308    }
58309
58310    #[simd_test(enable = "avx512f")]
58311    unsafe fn test_mm_maskz_fmadd_ss() {
58312        let a = _mm_set1_ps(1.);
58313        let b = _mm_set1_ps(2.);
58314        let c = _mm_set1_ps(3.);
58315        let r = _mm_maskz_fmadd_ss(0, a, b, c);
58316        let e = _mm_set_ps(1., 1., 1., 0.);
58317        assert_eq_m128(r, e);
58318        let r = _mm_maskz_fmadd_ss(0b11111111, a, b, c);
58319        let e = _mm_set_ps(1., 1., 1., 5.);
58320        assert_eq_m128(r, e);
58321    }
58322
58323    #[simd_test(enable = "avx512f")]
58324    unsafe fn test_mm_mask3_fmadd_ss() {
58325        let a = _mm_set1_ps(1.);
58326        let b = _mm_set1_ps(2.);
58327        let c = _mm_set1_ps(3.);
58328        let r = _mm_mask3_fmadd_ss(a, b, c, 0);
58329        assert_eq_m128(r, c);
58330        let r = _mm_mask3_fmadd_ss(a, b, c, 0b11111111);
58331        let e = _mm_set_ps(3., 3., 3., 5.);
58332        assert_eq_m128(r, e);
58333    }
58334
58335    #[simd_test(enable = "avx512f")]
58336    unsafe fn test_mm_mask_fmadd_sd() {
58337        let a = _mm_set1_pd(1.);
58338        let b = _mm_set1_pd(2.);
58339        let c = _mm_set1_pd(3.);
58340        let r = _mm_mask_fmadd_sd(a, 0, b, c);
58341        assert_eq_m128d(r, a);
58342        let r = _mm_mask_fmadd_sd(a, 0b11111111, b, c);
58343        let e = _mm_set_pd(1., 5.);
58344        assert_eq_m128d(r, e);
58345    }
58346
58347    #[simd_test(enable = "avx512f")]
58348    unsafe fn test_mm_maskz_fmadd_sd() {
58349        let a = _mm_set1_pd(1.);
58350        let b = _mm_set1_pd(2.);
58351        let c = _mm_set1_pd(3.);
58352        let r = _mm_maskz_fmadd_sd(0, a, b, c);
58353        let e = _mm_set_pd(1., 0.);
58354        assert_eq_m128d(r, e);
58355        let r = _mm_maskz_fmadd_sd(0b11111111, a, b, c);
58356        let e = _mm_set_pd(1., 5.);
58357        assert_eq_m128d(r, e);
58358    }
58359
58360    #[simd_test(enable = "avx512f")]
58361    unsafe fn test_mm_mask3_fmadd_sd() {
58362        let a = _mm_set1_pd(1.);
58363        let b = _mm_set1_pd(2.);
58364        let c = _mm_set1_pd(3.);
58365        let r = _mm_mask3_fmadd_sd(a, b, c, 0);
58366        assert_eq_m128d(r, c);
58367        let r = _mm_mask3_fmadd_sd(a, b, c, 0b11111111);
58368        let e = _mm_set_pd(3., 5.);
58369        assert_eq_m128d(r, e);
58370    }
58371
58372    #[simd_test(enable = "avx512f")]
58373    unsafe fn test_mm_mask_fmsub_ss() {
58374        let a = _mm_set1_ps(1.);
58375        let b = _mm_set1_ps(2.);
58376        let c = _mm_set1_ps(3.);
58377        let r = _mm_mask_fmsub_ss(a, 0, b, c);
58378        assert_eq_m128(r, a);
58379        let r = _mm_mask_fmsub_ss(a, 0b11111111, b, c);
58380        let e = _mm_set_ps(1., 1., 1., -1.);
58381        assert_eq_m128(r, e);
58382    }
58383
58384    #[simd_test(enable = "avx512f")]
58385    unsafe fn test_mm_maskz_fmsub_ss() {
58386        let a = _mm_set1_ps(1.);
58387        let b = _mm_set1_ps(2.);
58388        let c = _mm_set1_ps(3.);
58389        let r = _mm_maskz_fmsub_ss(0, a, b, c);
58390        let e = _mm_set_ps(1., 1., 1., 0.);
58391        assert_eq_m128(r, e);
58392        let r = _mm_maskz_fmsub_ss(0b11111111, a, b, c);
58393        let e = _mm_set_ps(1., 1., 1., -1.);
58394        assert_eq_m128(r, e);
58395    }
58396
58397    #[simd_test(enable = "avx512f")]
58398    unsafe fn test_mm_mask3_fmsub_ss() {
58399        let a = _mm_set1_ps(1.);
58400        let b = _mm_set1_ps(2.);
58401        let c = _mm_set1_ps(3.);
58402        let r = _mm_mask3_fmsub_ss(a, b, c, 0);
58403        assert_eq_m128(r, c);
58404        let r = _mm_mask3_fmsub_ss(a, b, c, 0b11111111);
58405        let e = _mm_set_ps(3., 3., 3., -1.);
58406        assert_eq_m128(r, e);
58407    }
58408
58409    #[simd_test(enable = "avx512f")]
58410    unsafe fn test_mm_mask_fmsub_sd() {
58411        let a = _mm_set1_pd(1.);
58412        let b = _mm_set1_pd(2.);
58413        let c = _mm_set1_pd(3.);
58414        let r = _mm_mask_fmsub_sd(a, 0, b, c);
58415        assert_eq_m128d(r, a);
58416        let r = _mm_mask_fmsub_sd(a, 0b11111111, b, c);
58417        let e = _mm_set_pd(1., -1.);
58418        assert_eq_m128d(r, e);
58419    }
58420
58421    #[simd_test(enable = "avx512f")]
58422    unsafe fn test_mm_maskz_fmsub_sd() {
58423        let a = _mm_set1_pd(1.);
58424        let b = _mm_set1_pd(2.);
58425        let c = _mm_set1_pd(3.);
58426        let r = _mm_maskz_fmsub_sd(0, a, b, c);
58427        let e = _mm_set_pd(1., 0.);
58428        assert_eq_m128d(r, e);
58429        let r = _mm_maskz_fmsub_sd(0b11111111, a, b, c);
58430        let e = _mm_set_pd(1., -1.);
58431        assert_eq_m128d(r, e);
58432    }
58433
58434    #[simd_test(enable = "avx512f")]
58435    unsafe fn test_mm_mask3_fmsub_sd() {
58436        let a = _mm_set1_pd(1.);
58437        let b = _mm_set1_pd(2.);
58438        let c = _mm_set1_pd(3.);
58439        let r = _mm_mask3_fmsub_sd(a, b, c, 0);
58440        assert_eq_m128d(r, c);
58441        let r = _mm_mask3_fmsub_sd(a, b, c, 0b11111111);
58442        let e = _mm_set_pd(3., -1.);
58443        assert_eq_m128d(r, e);
58444    }
58445
58446    #[simd_test(enable = "avx512f")]
58447    unsafe fn test_mm_mask_fnmadd_ss() {
58448        let a = _mm_set1_ps(1.);
58449        let b = _mm_set1_ps(2.);
58450        let c = _mm_set1_ps(3.);
58451        let r = _mm_mask_fnmadd_ss(a, 0, b, c);
58452        assert_eq_m128(r, a);
58453        let r = _mm_mask_fnmadd_ss(a, 0b11111111, b, c);
58454        let e = _mm_set_ps(1., 1., 1., 1.);
58455        assert_eq_m128(r, e);
58456    }
58457
58458    #[simd_test(enable = "avx512f")]
58459    unsafe fn test_mm_maskz_fnmadd_ss() {
58460        let a = _mm_set1_ps(1.);
58461        let b = _mm_set1_ps(2.);
58462        let c = _mm_set1_ps(3.);
58463        let r = _mm_maskz_fnmadd_ss(0, a, b, c);
58464        let e = _mm_set_ps(1., 1., 1., 0.);
58465        assert_eq_m128(r, e);
58466        let r = _mm_maskz_fnmadd_ss(0b11111111, a, b, c);
58467        let e = _mm_set_ps(1., 1., 1., 1.);
58468        assert_eq_m128(r, e);
58469    }
58470
58471    #[simd_test(enable = "avx512f")]
58472    unsafe fn test_mm_mask3_fnmadd_ss() {
58473        let a = _mm_set1_ps(1.);
58474        let b = _mm_set1_ps(2.);
58475        let c = _mm_set1_ps(3.);
58476        let r = _mm_mask3_fnmadd_ss(a, b, c, 0);
58477        assert_eq_m128(r, c);
58478        let r = _mm_mask3_fnmadd_ss(a, b, c, 0b11111111);
58479        let e = _mm_set_ps(3., 3., 3., 1.);
58480        assert_eq_m128(r, e);
58481    }
58482
58483    #[simd_test(enable = "avx512f")]
58484    unsafe fn test_mm_mask_fnmadd_sd() {
58485        let a = _mm_set1_pd(1.);
58486        let b = _mm_set1_pd(2.);
58487        let c = _mm_set1_pd(3.);
58488        let r = _mm_mask_fnmadd_sd(a, 0, b, c);
58489        assert_eq_m128d(r, a);
58490        let r = _mm_mask_fnmadd_sd(a, 0b11111111, b, c);
58491        let e = _mm_set_pd(1., 1.);
58492        assert_eq_m128d(r, e);
58493    }
58494
58495    #[simd_test(enable = "avx512f")]
58496    unsafe fn test_mm_maskz_fnmadd_sd() {
58497        let a = _mm_set1_pd(1.);
58498        let b = _mm_set1_pd(2.);
58499        let c = _mm_set1_pd(3.);
58500        let r = _mm_maskz_fnmadd_sd(0, a, b, c);
58501        let e = _mm_set_pd(1., 0.);
58502        assert_eq_m128d(r, e);
58503        let r = _mm_maskz_fnmadd_sd(0b11111111, a, b, c);
58504        let e = _mm_set_pd(1., 1.);
58505        assert_eq_m128d(r, e);
58506    }
58507
58508    #[simd_test(enable = "avx512f")]
58509    unsafe fn test_mm_mask3_fnmadd_sd() {
58510        let a = _mm_set1_pd(1.);
58511        let b = _mm_set1_pd(2.);
58512        let c = _mm_set1_pd(3.);
58513        let r = _mm_mask3_fnmadd_sd(a, b, c, 0);
58514        assert_eq_m128d(r, c);
58515        let r = _mm_mask3_fnmadd_sd(a, b, c, 0b11111111);
58516        let e = _mm_set_pd(3., 1.);
58517        assert_eq_m128d(r, e);
58518    }
58519
58520    #[simd_test(enable = "avx512f")]
58521    unsafe fn test_mm_mask_fnmsub_ss() {
58522        let a = _mm_set1_ps(1.);
58523        let b = _mm_set1_ps(2.);
58524        let c = _mm_set1_ps(3.);
58525        let r = _mm_mask_fnmsub_ss(a, 0, b, c);
58526        assert_eq_m128(r, a);
58527        let r = _mm_mask_fnmsub_ss(a, 0b11111111, b, c);
58528        let e = _mm_set_ps(1., 1., 1., -5.);
58529        assert_eq_m128(r, e);
58530    }
58531
58532    #[simd_test(enable = "avx512f")]
58533    unsafe fn test_mm_maskz_fnmsub_ss() {
58534        let a = _mm_set1_ps(1.);
58535        let b = _mm_set1_ps(2.);
58536        let c = _mm_set1_ps(3.);
58537        let r = _mm_maskz_fnmsub_ss(0, a, b, c);
58538        let e = _mm_set_ps(1., 1., 1., 0.);
58539        assert_eq_m128(r, e);
58540        let r = _mm_maskz_fnmsub_ss(0b11111111, a, b, c);
58541        let e = _mm_set_ps(1., 1., 1., -5.);
58542        assert_eq_m128(r, e);
58543    }
58544
58545    #[simd_test(enable = "avx512f")]
58546    unsafe fn test_mm_mask3_fnmsub_ss() {
58547        let a = _mm_set1_ps(1.);
58548        let b = _mm_set1_ps(2.);
58549        let c = _mm_set1_ps(3.);
58550        let r = _mm_mask3_fnmsub_ss(a, b, c, 0);
58551        assert_eq_m128(r, c);
58552        let r = _mm_mask3_fnmsub_ss(a, b, c, 0b11111111);
58553        let e = _mm_set_ps(3., 3., 3., -5.);
58554        assert_eq_m128(r, e);
58555    }
58556
58557    #[simd_test(enable = "avx512f")]
58558    unsafe fn test_mm_mask_fnmsub_sd() {
58559        let a = _mm_set1_pd(1.);
58560        let b = _mm_set1_pd(2.);
58561        let c = _mm_set1_pd(3.);
58562        let r = _mm_mask_fnmsub_sd(a, 0, b, c);
58563        assert_eq_m128d(r, a);
58564        let r = _mm_mask_fnmsub_sd(a, 0b11111111, b, c);
58565        let e = _mm_set_pd(1., -5.);
58566        assert_eq_m128d(r, e);
58567    }
58568
58569    #[simd_test(enable = "avx512f")]
58570    unsafe fn test_mm_maskz_fnmsub_sd() {
58571        let a = _mm_set1_pd(1.);
58572        let b = _mm_set1_pd(2.);
58573        let c = _mm_set1_pd(3.);
58574        let r = _mm_maskz_fnmsub_sd(0, a, b, c);
58575        let e = _mm_set_pd(1., 0.);
58576        assert_eq_m128d(r, e);
58577        let r = _mm_maskz_fnmsub_sd(0b11111111, a, b, c);
58578        let e = _mm_set_pd(1., -5.);
58579        assert_eq_m128d(r, e);
58580    }
58581
58582    #[simd_test(enable = "avx512f")]
58583    unsafe fn test_mm_mask3_fnmsub_sd() {
58584        let a = _mm_set1_pd(1.);
58585        let b = _mm_set1_pd(2.);
58586        let c = _mm_set1_pd(3.);
58587        let r = _mm_mask3_fnmsub_sd(a, b, c, 0);
58588        assert_eq_m128d(r, c);
58589        let r = _mm_mask3_fnmsub_sd(a, b, c, 0b11111111);
58590        let e = _mm_set_pd(3., -5.);
58591        assert_eq_m128d(r, e);
58592    }
58593
58594    #[simd_test(enable = "avx512f")]
58595    unsafe fn test_mm_add_round_ss() {
58596        let a = _mm_set_ps(1., 2., 10., 20.);
58597        let b = _mm_set_ps(3., 4., 30., 40.);
58598        let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58599        let e = _mm_set_ps(1., 2., 10., 60.);
58600        assert_eq_m128(r, e);
58601    }
58602
58603    #[simd_test(enable = "avx512f")]
58604    unsafe fn test_mm_mask_add_round_ss() {
58605        let src = _mm_set_ps(10., 11., 100., 110.);
58606        let a = _mm_set_ps(1., 2., 10., 20.);
58607        let b = _mm_set_ps(3., 4., 30., 40.);
58608        let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58609        let e = _mm_set_ps(1., 2., 10., 110.);
58610        assert_eq_m128(r, e);
58611        let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58612            src, 0b11111111, a, b,
58613        );
58614        let e = _mm_set_ps(1., 2., 10., 60.);
58615        assert_eq_m128(r, e);
58616    }
58617
58618    #[simd_test(enable = "avx512f")]
58619    unsafe fn test_mm_maskz_add_round_ss() {
58620        let a = _mm_set_ps(1., 2., 10., 20.);
58621        let b = _mm_set_ps(3., 4., 30., 40.);
58622        let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58623        let e = _mm_set_ps(1., 2., 10., 0.);
58624        assert_eq_m128(r, e);
58625        let r =
58626            _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58627        let e = _mm_set_ps(1., 2., 10., 60.);
58628        assert_eq_m128(r, e);
58629    }
58630
58631    #[simd_test(enable = "avx512f")]
58632    unsafe fn test_mm_add_round_sd() {
58633        let a = _mm_set_pd(1., 2.);
58634        let b = _mm_set_pd(3., 4.);
58635        let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58636        let e = _mm_set_pd(1., 6.);
58637        assert_eq_m128d(r, e);
58638    }
58639
58640    #[simd_test(enable = "avx512f")]
58641    unsafe fn test_mm_mask_add_round_sd() {
58642        let src = _mm_set_pd(10., 11.);
58643        let a = _mm_set_pd(1., 2.);
58644        let b = _mm_set_pd(3., 4.);
58645        let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58646        let e = _mm_set_pd(1., 11.);
58647        assert_eq_m128d(r, e);
58648        let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58649            src, 0b11111111, a, b,
58650        );
58651        let e = _mm_set_pd(1., 6.);
58652        assert_eq_m128d(r, e);
58653    }
58654
58655    #[simd_test(enable = "avx512f")]
58656    unsafe fn test_mm_maskz_add_round_sd() {
58657        let a = _mm_set_pd(1., 2.);
58658        let b = _mm_set_pd(3., 4.);
58659        let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58660        let e = _mm_set_pd(1., 0.);
58661        assert_eq_m128d(r, e);
58662        let r =
58663            _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58664        let e = _mm_set_pd(1., 6.);
58665        assert_eq_m128d(r, e);
58666    }
58667
58668    #[simd_test(enable = "avx512f")]
58669    unsafe fn test_mm_sub_round_ss() {
58670        let a = _mm_set_ps(1., 2., 10., 20.);
58671        let b = _mm_set_ps(3., 4., 30., 40.);
58672        let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58673        let e = _mm_set_ps(1., 2., 10., -20.);
58674        assert_eq_m128(r, e);
58675    }
58676
58677    #[simd_test(enable = "avx512f")]
58678    unsafe fn test_mm_mask_sub_round_ss() {
58679        let src = _mm_set_ps(10., 11., 100., 110.);
58680        let a = _mm_set_ps(1., 2., 10., 20.);
58681        let b = _mm_set_ps(3., 4., 30., 40.);
58682        let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58683        let e = _mm_set_ps(1., 2., 10., 110.);
58684        assert_eq_m128(r, e);
58685        let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58686            src, 0b11111111, a, b,
58687        );
58688        let e = _mm_set_ps(1., 2., 10., -20.);
58689        assert_eq_m128(r, e);
58690    }
58691
58692    #[simd_test(enable = "avx512f")]
58693    unsafe fn test_mm_maskz_sub_round_ss() {
58694        let a = _mm_set_ps(1., 2., 10., 20.);
58695        let b = _mm_set_ps(3., 4., 30., 40.);
58696        let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58697        let e = _mm_set_ps(1., 2., 10., 0.);
58698        assert_eq_m128(r, e);
58699        let r =
58700            _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58701        let e = _mm_set_ps(1., 2., 10., -20.);
58702        assert_eq_m128(r, e);
58703    }
58704
58705    #[simd_test(enable = "avx512f")]
58706    unsafe fn test_mm_sub_round_sd() {
58707        let a = _mm_set_pd(1., 2.);
58708        let b = _mm_set_pd(3., 4.);
58709        let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58710        let e = _mm_set_pd(1., -2.);
58711        assert_eq_m128d(r, e);
58712    }
58713
58714    #[simd_test(enable = "avx512f")]
58715    unsafe fn test_mm_mask_sub_round_sd() {
58716        let src = _mm_set_pd(10., 11.);
58717        let a = _mm_set_pd(1., 2.);
58718        let b = _mm_set_pd(3., 4.);
58719        let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58720        let e = _mm_set_pd(1., 11.);
58721        assert_eq_m128d(r, e);
58722        let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58723            src, 0b11111111, a, b,
58724        );
58725        let e = _mm_set_pd(1., -2.);
58726        assert_eq_m128d(r, e);
58727    }
58728
58729    #[simd_test(enable = "avx512f")]
58730    unsafe fn test_mm_maskz_sub_round_sd() {
58731        let a = _mm_set_pd(1., 2.);
58732        let b = _mm_set_pd(3., 4.);
58733        let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58734        let e = _mm_set_pd(1., 0.);
58735        assert_eq_m128d(r, e);
58736        let r =
58737            _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58738        let e = _mm_set_pd(1., -2.);
58739        assert_eq_m128d(r, e);
58740    }
58741
58742    #[simd_test(enable = "avx512f")]
58743    unsafe fn test_mm_mul_round_ss() {
58744        let a = _mm_set_ps(1., 2., 10., 20.);
58745        let b = _mm_set_ps(3., 4., 30., 40.);
58746        let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58747        let e = _mm_set_ps(1., 2., 10., 800.);
58748        assert_eq_m128(r, e);
58749    }
58750
58751    #[simd_test(enable = "avx512f")]
58752    unsafe fn test_mm_mask_mul_round_ss() {
58753        let src = _mm_set_ps(10., 11., 100., 110.);
58754        let a = _mm_set_ps(1., 2., 10., 20.);
58755        let b = _mm_set_ps(3., 4., 30., 40.);
58756        let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58757        let e = _mm_set_ps(1., 2., 10., 110.);
58758        assert_eq_m128(r, e);
58759        let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58760            src, 0b11111111, a, b,
58761        );
58762        let e = _mm_set_ps(1., 2., 10., 800.);
58763        assert_eq_m128(r, e);
58764    }
58765
58766    #[simd_test(enable = "avx512f")]
58767    unsafe fn test_mm_maskz_mul_round_ss() {
58768        let a = _mm_set_ps(1., 2., 10., 20.);
58769        let b = _mm_set_ps(3., 4., 30., 40.);
58770        let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58771        let e = _mm_set_ps(1., 2., 10., 0.);
58772        assert_eq_m128(r, e);
58773        let r =
58774            _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58775        let e = _mm_set_ps(1., 2., 10., 800.);
58776        assert_eq_m128(r, e);
58777    }
58778
58779    #[simd_test(enable = "avx512f")]
58780    unsafe fn test_mm_mul_round_sd() {
58781        let a = _mm_set_pd(1., 2.);
58782        let b = _mm_set_pd(3., 4.);
58783        let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58784        let e = _mm_set_pd(1., 8.);
58785        assert_eq_m128d(r, e);
58786    }
58787
58788    #[simd_test(enable = "avx512f")]
58789    unsafe fn test_mm_mask_mul_round_sd() {
58790        let src = _mm_set_pd(10., 11.);
58791        let a = _mm_set_pd(1., 2.);
58792        let b = _mm_set_pd(3., 4.);
58793        let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58794        let e = _mm_set_pd(1., 11.);
58795        assert_eq_m128d(r, e);
58796        let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58797            src, 0b11111111, a, b,
58798        );
58799        let e = _mm_set_pd(1., 8.);
58800        assert_eq_m128d(r, e);
58801    }
58802
58803    #[simd_test(enable = "avx512f")]
58804    unsafe fn test_mm_maskz_mul_round_sd() {
58805        let a = _mm_set_pd(1., 2.);
58806        let b = _mm_set_pd(3., 4.);
58807        let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58808        let e = _mm_set_pd(1., 0.);
58809        assert_eq_m128d(r, e);
58810        let r =
58811            _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58812        let e = _mm_set_pd(1., 8.);
58813        assert_eq_m128d(r, e);
58814    }
58815
58816    #[simd_test(enable = "avx512f")]
58817    unsafe fn test_mm_div_round_ss() {
58818        let a = _mm_set_ps(1., 2., 10., 20.);
58819        let b = _mm_set_ps(3., 4., 30., 40.);
58820        let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58821        let e = _mm_set_ps(1., 2., 10., 0.5);
58822        assert_eq_m128(r, e);
58823    }
58824
58825    #[simd_test(enable = "avx512f")]
58826    unsafe fn test_mm_mask_div_round_ss() {
58827        let src = _mm_set_ps(10., 11., 100., 110.);
58828        let a = _mm_set_ps(1., 2., 10., 20.);
58829        let b = _mm_set_ps(3., 4., 30., 40.);
58830        let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58831        let e = _mm_set_ps(1., 2., 10., 110.);
58832        assert_eq_m128(r, e);
58833        let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58834            src, 0b11111111, a, b,
58835        );
58836        let e = _mm_set_ps(1., 2., 10., 0.5);
58837        assert_eq_m128(r, e);
58838    }
58839
58840    #[simd_test(enable = "avx512f")]
58841    unsafe fn test_mm_maskz_div_round_ss() {
58842        let a = _mm_set_ps(1., 2., 10., 20.);
58843        let b = _mm_set_ps(3., 4., 30., 40.);
58844        let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58845        let e = _mm_set_ps(1., 2., 10., 0.);
58846        assert_eq_m128(r, e);
58847        let r =
58848            _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58849        let e = _mm_set_ps(1., 2., 10., 0.5);
58850        assert_eq_m128(r, e);
58851    }
58852
58853    #[simd_test(enable = "avx512f")]
58854    unsafe fn test_mm_div_round_sd() {
58855        let a = _mm_set_pd(1., 2.);
58856        let b = _mm_set_pd(3., 4.);
58857        let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58858        let e = _mm_set_pd(1., 0.5);
58859        assert_eq_m128d(r, e);
58860    }
58861
58862    #[simd_test(enable = "avx512f")]
58863    unsafe fn test_mm_mask_div_round_sd() {
58864        let src = _mm_set_pd(10., 11.);
58865        let a = _mm_set_pd(1., 2.);
58866        let b = _mm_set_pd(3., 4.);
58867        let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58868        let e = _mm_set_pd(1., 11.);
58869        assert_eq_m128d(r, e);
58870        let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58871            src, 0b11111111, a, b,
58872        );
58873        let e = _mm_set_pd(1., 0.5);
58874        assert_eq_m128d(r, e);
58875    }
58876
58877    #[simd_test(enable = "avx512f")]
58878    unsafe fn test_mm_maskz_div_round_sd() {
58879        let a = _mm_set_pd(1., 2.);
58880        let b = _mm_set_pd(3., 4.);
58881        let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58882        let e = _mm_set_pd(1., 0.);
58883        assert_eq_m128d(r, e);
58884        let r =
58885            _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58886        let e = _mm_set_pd(1., 0.5);
58887        assert_eq_m128d(r, e);
58888    }
58889
58890    #[simd_test(enable = "avx512f")]
58891    unsafe fn test_mm_max_round_ss() {
58892        let a = _mm_set_ps(0., 1., 2., 3.);
58893        let b = _mm_set_ps(4., 5., 6., 7.);
58894        let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
58895        let e = _mm_set_ps(0., 1., 2., 7.);
58896        assert_eq_m128(r, e);
58897    }
58898
58899    #[simd_test(enable = "avx512f")]
58900    unsafe fn test_mm_mask_max_round_ss() {
58901        let a = _mm_set_ps(0., 1., 2., 3.);
58902        let b = _mm_set_ps(4., 5., 6., 7.);
58903        let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58904        let e = _mm_set_ps(0., 1., 2., 3.);
58905        assert_eq_m128(r, e);
58906        let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58907        let e = _mm_set_ps(0., 1., 2., 7.);
58908        assert_eq_m128(r, e);
58909    }
58910
58911    #[simd_test(enable = "avx512f")]
58912    unsafe fn test_mm_maskz_max_round_ss() {
58913        let a = _mm_set_ps(0., 1., 2., 3.);
58914        let b = _mm_set_ps(4., 5., 6., 7.);
58915        let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58916        let e = _mm_set_ps(0., 1., 2., 0.);
58917        assert_eq_m128(r, e);
58918        let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58919        let e = _mm_set_ps(0., 1., 2., 7.);
58920        assert_eq_m128(r, e);
58921    }
58922
58923    #[simd_test(enable = "avx512f")]
58924    unsafe fn test_mm_max_round_sd() {
58925        let a = _mm_set_pd(0., 1.);
58926        let b = _mm_set_pd(2., 3.);
58927        let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
58928        let e = _mm_set_pd(0., 3.);
58929        assert_eq_m128d(r, e);
58930    }
58931
58932    #[simd_test(enable = "avx512f")]
58933    unsafe fn test_mm_mask_max_round_sd() {
58934        let a = _mm_set_pd(0., 1.);
58935        let b = _mm_set_pd(2., 3.);
58936        let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58937        let e = _mm_set_pd(0., 1.);
58938        assert_eq_m128d(r, e);
58939        let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58940        let e = _mm_set_pd(0., 3.);
58941        assert_eq_m128d(r, e);
58942    }
58943
58944    #[simd_test(enable = "avx512f")]
58945    unsafe fn test_mm_maskz_max_round_sd() {
58946        let a = _mm_set_pd(0., 1.);
58947        let b = _mm_set_pd(2., 3.);
58948        let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58949        let e = _mm_set_pd(0., 0.);
58950        assert_eq_m128d(r, e);
58951        let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58952        let e = _mm_set_pd(0., 3.);
58953        assert_eq_m128d(r, e);
58954    }
58955
58956    #[simd_test(enable = "avx512f")]
58957    unsafe fn test_mm_min_round_ss() {
58958        let a = _mm_set_ps(0., 1., 2., 3.);
58959        let b = _mm_set_ps(4., 5., 6., 7.);
58960        let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
58961        let e = _mm_set_ps(0., 1., 2., 3.);
58962        assert_eq_m128(r, e);
58963    }
58964
58965    #[simd_test(enable = "avx512f")]
58966    unsafe fn test_mm_mask_min_round_ss() {
58967        let a = _mm_set_ps(0., 1., 2., 3.);
58968        let b = _mm_set_ps(4., 5., 6., 7.);
58969        let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58970        let e = _mm_set_ps(0., 1., 2., 3.);
58971        assert_eq_m128(r, e);
58972        let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58973        let e = _mm_set_ps(0., 1., 2., 3.);
58974        assert_eq_m128(r, e);
58975    }
58976
58977    #[simd_test(enable = "avx512f")]
58978    unsafe fn test_mm_maskz_min_round_ss() {
58979        let a = _mm_set_ps(0., 1., 2., 3.);
58980        let b = _mm_set_ps(4., 5., 6., 7.);
58981        let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58982        let e = _mm_set_ps(0., 1., 2., 0.);
58983        assert_eq_m128(r, e);
58984        let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58985        let e = _mm_set_ps(0., 1., 2., 3.);
58986        assert_eq_m128(r, e);
58987    }
58988
58989    #[simd_test(enable = "avx512f")]
58990    unsafe fn test_mm_min_round_sd() {
58991        let a = _mm_set_pd(0., 1.);
58992        let b = _mm_set_pd(2., 3.);
58993        let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
58994        let e = _mm_set_pd(0., 1.);
58995        assert_eq_m128d(r, e);
58996    }
58997
58998    #[simd_test(enable = "avx512f")]
58999    unsafe fn test_mm_mask_min_round_sd() {
59000        let a = _mm_set_pd(0., 1.);
59001        let b = _mm_set_pd(2., 3.);
59002        let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59003        let e = _mm_set_pd(0., 1.);
59004        assert_eq_m128d(r, e);
59005        let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59006        let e = _mm_set_pd(0., 1.);
59007        assert_eq_m128d(r, e);
59008    }
59009
59010    #[simd_test(enable = "avx512f")]
59011    unsafe fn test_mm_maskz_min_round_sd() {
59012        let a = _mm_set_pd(0., 1.);
59013        let b = _mm_set_pd(2., 3.);
59014        let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
59015        let e = _mm_set_pd(0., 0.);
59016        assert_eq_m128d(r, e);
59017        let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59018        let e = _mm_set_pd(0., 1.);
59019        assert_eq_m128d(r, e);
59020    }
59021
59022    #[simd_test(enable = "avx512f")]
59023    unsafe fn test_mm_sqrt_round_ss() {
59024        let a = _mm_set_ps(1., 2., 10., 20.);
59025        let b = _mm_set_ps(3., 4., 30., 4.);
59026        let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
59027        let e = _mm_set_ps(1., 2., 10., 2.);
59028        assert_eq_m128(r, e);
59029    }
59030
59031    #[simd_test(enable = "avx512f")]
59032    unsafe fn test_mm_mask_sqrt_round_ss() {
59033        let src = _mm_set_ps(10., 11., 100., 110.);
59034        let a = _mm_set_ps(1., 2., 10., 20.);
59035        let b = _mm_set_ps(3., 4., 30., 4.);
59036        let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
59037        let e = _mm_set_ps(1., 2., 10., 110.);
59038        assert_eq_m128(r, e);
59039        let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
59040            src, 0b11111111, a, b,
59041        );
59042        let e = _mm_set_ps(1., 2., 10., 2.);
59043        assert_eq_m128(r, e);
59044    }
59045
59046    #[simd_test(enable = "avx512f")]
59047    unsafe fn test_mm_maskz_sqrt_round_ss() {
59048        let a = _mm_set_ps(1., 2., 10., 20.);
59049        let b = _mm_set_ps(3., 4., 30., 4.);
59050        let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
59051        let e = _mm_set_ps(1., 2., 10., 0.);
59052        assert_eq_m128(r, e);
59053        let r =
59054            _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
59055        let e = _mm_set_ps(1., 2., 10., 2.);
59056        assert_eq_m128(r, e);
59057    }
59058
59059    #[simd_test(enable = "avx512f")]
59060    unsafe fn test_mm_sqrt_round_sd() {
59061        let a = _mm_set_pd(1., 2.);
59062        let b = _mm_set_pd(3., 4.);
59063        let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
59064        let e = _mm_set_pd(1., 2.);
59065        assert_eq_m128d(r, e);
59066    }
59067
59068    #[simd_test(enable = "avx512f")]
59069    unsafe fn test_mm_mask_sqrt_round_sd() {
59070        let src = _mm_set_pd(10., 11.);
59071        let a = _mm_set_pd(1., 2.);
59072        let b = _mm_set_pd(3., 4.);
59073        let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
59074        let e = _mm_set_pd(1., 11.);
59075        assert_eq_m128d(r, e);
59076        let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
59077            src, 0b11111111, a, b,
59078        );
59079        let e = _mm_set_pd(1., 2.);
59080        assert_eq_m128d(r, e);
59081    }
59082
59083    #[simd_test(enable = "avx512f")]
59084    unsafe fn test_mm_maskz_sqrt_round_sd() {
59085        let a = _mm_set_pd(1., 2.);
59086        let b = _mm_set_pd(3., 4.);
59087        let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
59088        let e = _mm_set_pd(1., 0.);
59089        assert_eq_m128d(r, e);
59090        let r =
59091            _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
59092        let e = _mm_set_pd(1., 2.);
59093        assert_eq_m128d(r, e);
59094    }
59095
59096    #[simd_test(enable = "avx512f")]
59097    unsafe fn test_mm_getexp_round_ss() {
59098        let a = _mm_set1_ps(2.);
59099        let b = _mm_set1_ps(3.);
59100        let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
59101        let e = _mm_set_ps(2., 2., 2., 1.);
59102        assert_eq_m128(r, e);
59103    }
59104
59105    #[simd_test(enable = "avx512f")]
59106    unsafe fn test_mm_mask_getexp_round_ss() {
59107        let a = _mm_set1_ps(2.);
59108        let b = _mm_set1_ps(3.);
59109        let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59110        let e = _mm_set_ps(2., 2., 2., 2.);
59111        assert_eq_m128(r, e);
59112        let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59113        let e = _mm_set_ps(2., 2., 2., 1.);
59114        assert_eq_m128(r, e);
59115    }
59116
59117    #[simd_test(enable = "avx512f")]
59118    unsafe fn test_mm_maskz_getexp_round_ss() {
59119        let a = _mm_set1_ps(2.);
59120        let b = _mm_set1_ps(3.);
59121        let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
59122        let e = _mm_set_ps(2., 2., 2., 0.);
59123        assert_eq_m128(r, e);
59124        let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59125        let e = _mm_set_ps(2., 2., 2., 1.);
59126        assert_eq_m128(r, e);
59127    }
59128
59129    #[simd_test(enable = "avx512f")]
59130    unsafe fn test_mm_getexp_round_sd() {
59131        let a = _mm_set1_pd(2.);
59132        let b = _mm_set1_pd(3.);
59133        let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
59134        let e = _mm_set_pd(2., 1.);
59135        assert_eq_m128d(r, e);
59136    }
59137
59138    #[simd_test(enable = "avx512f")]
59139    unsafe fn test_mm_mask_getexp_round_sd() {
59140        let a = _mm_set1_pd(2.);
59141        let b = _mm_set1_pd(3.);
59142        let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59143        let e = _mm_set_pd(2., 2.);
59144        assert_eq_m128d(r, e);
59145        let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59146        let e = _mm_set_pd(2., 1.);
59147        assert_eq_m128d(r, e);
59148    }
59149
59150    #[simd_test(enable = "avx512f")]
59151    unsafe fn test_mm_maskz_getexp_round_sd() {
59152        let a = _mm_set1_pd(2.);
59153        let b = _mm_set1_pd(3.);
59154        let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
59155        let e = _mm_set_pd(2., 0.);
59156        assert_eq_m128d(r, e);
59157        let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59158        let e = _mm_set_pd(2., 1.);
59159        assert_eq_m128d(r, e);
59160    }
59161
59162    #[simd_test(enable = "avx512f")]
59163    unsafe fn test_mm_getmant_round_ss() {
59164        let a = _mm_set1_ps(20.);
59165        let b = _mm_set1_ps(10.);
59166        let r =
59167            _mm_getmant_round_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
59168                a, b,
59169            );
59170        let e = _mm_set_ps(20., 20., 20., 1.25);
59171        assert_eq_m128(r, e);
59172    }
59173
59174    #[simd_test(enable = "avx512f")]
59175    unsafe fn test_mm_mask_getmant_round_ss() {
59176        let a = _mm_set1_ps(20.);
59177        let b = _mm_set1_ps(10.);
59178        let r = _mm_mask_getmant_round_ss::<
59179            _MM_MANT_NORM_1_2,
59180            _MM_MANT_SIGN_SRC,
59181            _MM_FROUND_CUR_DIRECTION,
59182        >(a, 0, a, b);
59183        let e = _mm_set_ps(20., 20., 20., 20.);
59184        assert_eq_m128(r, e);
59185        let r = _mm_mask_getmant_round_ss::<
59186            _MM_MANT_NORM_1_2,
59187            _MM_MANT_SIGN_SRC,
59188            _MM_FROUND_CUR_DIRECTION,
59189        >(a, 0b11111111, a, b);
59190        let e = _mm_set_ps(20., 20., 20., 1.25);
59191        assert_eq_m128(r, e);
59192    }
59193
59194    #[simd_test(enable = "avx512f")]
59195    unsafe fn test_mm_maskz_getmant_round_ss() {
59196        let a = _mm_set1_ps(20.);
59197        let b = _mm_set1_ps(10.);
59198        let r = _mm_maskz_getmant_round_ss::<
59199            _MM_MANT_NORM_1_2,
59200            _MM_MANT_SIGN_SRC,
59201            _MM_FROUND_CUR_DIRECTION,
59202        >(0, a, b);
59203        let e = _mm_set_ps(20., 20., 20., 0.);
59204        assert_eq_m128(r, e);
59205        let r = _mm_maskz_getmant_round_ss::<
59206            _MM_MANT_NORM_1_2,
59207            _MM_MANT_SIGN_SRC,
59208            _MM_FROUND_CUR_DIRECTION,
59209        >(0b11111111, a, b);
59210        let e = _mm_set_ps(20., 20., 20., 1.25);
59211        assert_eq_m128(r, e);
59212    }
59213
59214    #[simd_test(enable = "avx512f")]
59215    unsafe fn test_mm_getmant_round_sd() {
59216        let a = _mm_set1_pd(20.);
59217        let b = _mm_set1_pd(10.);
59218        let r =
59219            _mm_getmant_round_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
59220                a, b,
59221            );
59222        let e = _mm_set_pd(20., 1.25);
59223        assert_eq_m128d(r, e);
59224    }
59225
59226    #[simd_test(enable = "avx512f")]
59227    unsafe fn test_mm_mask_getmant_round_sd() {
59228        let a = _mm_set1_pd(20.);
59229        let b = _mm_set1_pd(10.);
59230        let r = _mm_mask_getmant_round_sd::<
59231            _MM_MANT_NORM_1_2,
59232            _MM_MANT_SIGN_SRC,
59233            _MM_FROUND_CUR_DIRECTION,
59234        >(a, 0, a, b);
59235        let e = _mm_set_pd(20., 20.);
59236        assert_eq_m128d(r, e);
59237        let r = _mm_mask_getmant_round_sd::<
59238            _MM_MANT_NORM_1_2,
59239            _MM_MANT_SIGN_SRC,
59240            _MM_FROUND_CUR_DIRECTION,
59241        >(a, 0b11111111, a, b);
59242        let e = _mm_set_pd(20., 1.25);
59243        assert_eq_m128d(r, e);
59244    }
59245
59246    #[simd_test(enable = "avx512f")]
59247    unsafe fn test_mm_maskz_getmant_round_sd() {
59248        let a = _mm_set1_pd(20.);
59249        let b = _mm_set1_pd(10.);
59250        let r = _mm_maskz_getmant_round_sd::<
59251            _MM_MANT_NORM_1_2,
59252            _MM_MANT_SIGN_SRC,
59253            _MM_FROUND_CUR_DIRECTION,
59254        >(0, a, b);
59255        let e = _mm_set_pd(20., 0.);
59256        assert_eq_m128d(r, e);
59257        let r = _mm_maskz_getmant_round_sd::<
59258            _MM_MANT_NORM_1_2,
59259            _MM_MANT_SIGN_SRC,
59260            _MM_FROUND_CUR_DIRECTION,
59261        >(0b11111111, a, b);
59262        let e = _mm_set_pd(20., 1.25);
59263        assert_eq_m128d(r, e);
59264    }
59265
59266    #[simd_test(enable = "avx512f")]
59267    unsafe fn test_mm_roundscale_round_ss() {
59268        let a = _mm_set1_ps(2.2);
59269        let b = _mm_set1_ps(1.1);
59270        let r = _mm_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
59271        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59272        assert_eq_m128(r, e);
59273    }
59274
59275    #[simd_test(enable = "avx512f")]
59276    unsafe fn test_mm_mask_roundscale_round_ss() {
59277        let a = _mm_set1_ps(2.2);
59278        let b = _mm_set1_ps(1.1);
59279        let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59280        let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
59281        assert_eq_m128(r, e);
59282        let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59283        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59284        assert_eq_m128(r, e);
59285    }
59286
59287    #[simd_test(enable = "avx512f")]
59288    unsafe fn test_mm_maskz_roundscale_round_ss() {
59289        let a = _mm_set1_ps(2.2);
59290        let b = _mm_set1_ps(1.1);
59291        let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
59292        let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
59293        assert_eq_m128(r, e);
59294        let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59295        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59296        assert_eq_m128(r, e);
59297    }
59298
59299    #[simd_test(enable = "avx512f")]
59300    unsafe fn test_mm_roundscale_round_sd() {
59301        let a = _mm_set1_pd(2.2);
59302        let b = _mm_set1_pd(1.1);
59303        let r = _mm_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
59304        let e = _mm_set_pd(2.2, 1.0);
59305        assert_eq_m128d(r, e);
59306    }
59307
59308    #[simd_test(enable = "avx512f")]
59309    unsafe fn test_mm_mask_roundscale_round_sd() {
59310        let a = _mm_set1_pd(2.2);
59311        let b = _mm_set1_pd(1.1);
59312        let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59313        let e = _mm_set_pd(2.2, 2.2);
59314        assert_eq_m128d(r, e);
59315        let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59316        let e = _mm_set_pd(2.2, 1.0);
59317        assert_eq_m128d(r, e);
59318    }
59319
59320    #[simd_test(enable = "avx512f")]
59321    unsafe fn test_mm_maskz_roundscale_round_sd() {
59322        let a = _mm_set1_pd(2.2);
59323        let b = _mm_set1_pd(1.1);
59324        let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
59325        let e = _mm_set_pd(2.2, 0.0);
59326        assert_eq_m128d(r, e);
59327        let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59328        let e = _mm_set_pd(2.2, 1.0);
59329        assert_eq_m128d(r, e);
59330    }
59331
59332    #[simd_test(enable = "avx512f")]
59333    unsafe fn test_mm_scalef_round_ss() {
59334        let a = _mm_set1_ps(1.);
59335        let b = _mm_set1_ps(3.);
59336        let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
59337        let e = _mm_set_ps(1., 1., 1., 8.);
59338        assert_eq_m128(r, e);
59339    }
59340
59341    #[simd_test(enable = "avx512f")]
59342    unsafe fn test_mm_mask_scalef_round_ss() {
59343        let a = _mm_set1_ps(1.);
59344        let b = _mm_set1_ps(3.);
59345        let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59346            a, 0, a, b,
59347        );
59348        let e = _mm_set_ps(1., 1., 1., 1.);
59349        assert_eq_m128(r, e);
59350        let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59351            a, 0b11111111, a, b,
59352        );
59353        let e = _mm_set_ps(1., 1., 1., 8.);
59354        assert_eq_m128(r, e);
59355    }
59356
59357    #[simd_test(enable = "avx512f")]
59358    unsafe fn test_mm_maskz_scalef_round_ss() {
59359        let a = _mm_set1_ps(1.);
59360        let b = _mm_set1_ps(3.);
59361        let r =
59362            _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
59363        let e = _mm_set_ps(1., 1., 1., 0.);
59364        assert_eq_m128(r, e);
59365        let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59366            0b11111111, a, b,
59367        );
59368        let e = _mm_set_ps(1., 1., 1., 8.);
59369        assert_eq_m128(r, e);
59370    }
59371
59372    #[simd_test(enable = "avx512f")]
59373    unsafe fn test_mm_scalef_round_sd() {
59374        let a = _mm_set1_pd(1.);
59375        let b = _mm_set1_pd(3.);
59376        let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
59377        let e = _mm_set_pd(1., 8.);
59378        assert_eq_m128d(r, e);
59379    }
59380
59381    #[simd_test(enable = "avx512f")]
59382    unsafe fn test_mm_mask_scalef_round_sd() {
59383        let a = _mm_set1_pd(1.);
59384        let b = _mm_set1_pd(3.);
59385        let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59386            a, 0, a, b,
59387        );
59388        let e = _mm_set_pd(1., 1.);
59389        assert_eq_m128d(r, e);
59390        let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59391            a, 0b11111111, a, b,
59392        );
59393        let e = _mm_set_pd(1., 8.);
59394        assert_eq_m128d(r, e);
59395    }
59396
59397    #[simd_test(enable = "avx512f")]
59398    unsafe fn test_mm_maskz_scalef_round_sd() {
59399        let a = _mm_set1_pd(1.);
59400        let b = _mm_set1_pd(3.);
59401        let r =
59402            _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
59403        let e = _mm_set_pd(1., 0.);
59404        assert_eq_m128d(r, e);
59405        let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59406            0b11111111, a, b,
59407        );
59408        let e = _mm_set_pd(1., 8.);
59409        assert_eq_m128d(r, e);
59410    }
59411
59412    #[simd_test(enable = "avx512f")]
59413    unsafe fn test_mm_fmadd_round_ss() {
59414        let a = _mm_set1_ps(1.);
59415        let b = _mm_set1_ps(2.);
59416        let c = _mm_set1_ps(3.);
59417        let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59418        let e = _mm_set_ps(1., 1., 1., 5.);
59419        assert_eq_m128(r, e);
59420    }
59421
59422    #[simd_test(enable = "avx512f")]
59423    unsafe fn test_mm_mask_fmadd_round_ss() {
59424        let a = _mm_set1_ps(1.);
59425        let b = _mm_set1_ps(2.);
59426        let c = _mm_set1_ps(3.);
59427        let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59428            a, 0, b, c,
59429        );
59430        assert_eq_m128(r, a);
59431        let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59432            a, 0b11111111, b, c,
59433        );
59434        let e = _mm_set_ps(1., 1., 1., 5.);
59435        assert_eq_m128(r, e);
59436    }
59437
59438    #[simd_test(enable = "avx512f")]
59439    unsafe fn test_mm_maskz_fmadd_round_ss() {
59440        let a = _mm_set1_ps(1.);
59441        let b = _mm_set1_ps(2.);
59442        let c = _mm_set1_ps(3.);
59443        let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59444            0, a, b, c,
59445        );
59446        let e = _mm_set_ps(1., 1., 1., 0.);
59447        assert_eq_m128(r, e);
59448        let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59449            0b11111111, a, b, c,
59450        );
59451        let e = _mm_set_ps(1., 1., 1., 5.);
59452        assert_eq_m128(r, e);
59453    }
59454
59455    #[simd_test(enable = "avx512f")]
59456    unsafe fn test_mm_mask3_fmadd_round_ss() {
59457        let a = _mm_set1_ps(1.);
59458        let b = _mm_set1_ps(2.);
59459        let c = _mm_set1_ps(3.);
59460        let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59461            a, b, c, 0,
59462        );
59463        assert_eq_m128(r, c);
59464        let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59465            a, b, c, 0b11111111,
59466        );
59467        let e = _mm_set_ps(3., 3., 3., 5.);
59468        assert_eq_m128(r, e);
59469    }
59470
59471    #[simd_test(enable = "avx512f")]
59472    unsafe fn test_mm_fmadd_round_sd() {
59473        let a = _mm_set1_pd(1.);
59474        let b = _mm_set1_pd(2.);
59475        let c = _mm_set1_pd(3.);
59476        let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59477        let e = _mm_set_pd(1., 5.);
59478        assert_eq_m128d(r, e);
59479    }
59480
59481    #[simd_test(enable = "avx512f")]
59482    unsafe fn test_mm_mask_fmadd_round_sd() {
59483        let a = _mm_set1_pd(1.);
59484        let b = _mm_set1_pd(2.);
59485        let c = _mm_set1_pd(3.);
59486        let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59487            a, 0, b, c,
59488        );
59489        assert_eq_m128d(r, a);
59490        let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59491            a, 0b11111111, b, c,
59492        );
59493        let e = _mm_set_pd(1., 5.);
59494        assert_eq_m128d(r, e);
59495    }
59496
59497    #[simd_test(enable = "avx512f")]
59498    unsafe fn test_mm_maskz_fmadd_round_sd() {
59499        let a = _mm_set1_pd(1.);
59500        let b = _mm_set1_pd(2.);
59501        let c = _mm_set1_pd(3.);
59502        let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59503            0, a, b, c,
59504        );
59505        let e = _mm_set_pd(1., 0.);
59506        assert_eq_m128d(r, e);
59507        let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59508            0b11111111, a, b, c,
59509        );
59510        let e = _mm_set_pd(1., 5.);
59511        assert_eq_m128d(r, e);
59512    }
59513
59514    #[simd_test(enable = "avx512f")]
59515    unsafe fn test_mm_mask3_fmadd_round_sd() {
59516        let a = _mm_set1_pd(1.);
59517        let b = _mm_set1_pd(2.);
59518        let c = _mm_set1_pd(3.);
59519        let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59520            a, b, c, 0,
59521        );
59522        assert_eq_m128d(r, c);
59523        let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59524            a, b, c, 0b11111111,
59525        );
59526        let e = _mm_set_pd(3., 5.);
59527        assert_eq_m128d(r, e);
59528    }
59529
59530    #[simd_test(enable = "avx512f")]
59531    unsafe fn test_mm_fmsub_round_ss() {
59532        let a = _mm_set1_ps(1.);
59533        let b = _mm_set1_ps(2.);
59534        let c = _mm_set1_ps(3.);
59535        let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59536        let e = _mm_set_ps(1., 1., 1., -1.);
59537        assert_eq_m128(r, e);
59538    }
59539
59540    #[simd_test(enable = "avx512f")]
59541    unsafe fn test_mm_mask_fmsub_round_ss() {
59542        let a = _mm_set1_ps(1.);
59543        let b = _mm_set1_ps(2.);
59544        let c = _mm_set1_ps(3.);
59545        let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59546            a, 0, b, c,
59547        );
59548        assert_eq_m128(r, a);
59549        let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59550            a, 0b11111111, b, c,
59551        );
59552        let e = _mm_set_ps(1., 1., 1., -1.);
59553        assert_eq_m128(r, e);
59554    }
59555
59556    #[simd_test(enable = "avx512f")]
59557    unsafe fn test_mm_maskz_fmsub_round_ss() {
59558        let a = _mm_set1_ps(1.);
59559        let b = _mm_set1_ps(2.);
59560        let c = _mm_set1_ps(3.);
59561        let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59562            0, a, b, c,
59563        );
59564        let e = _mm_set_ps(1., 1., 1., 0.);
59565        assert_eq_m128(r, e);
59566        let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59567            0b11111111, a, b, c,
59568        );
59569        let e = _mm_set_ps(1., 1., 1., -1.);
59570        assert_eq_m128(r, e);
59571    }
59572
59573    #[simd_test(enable = "avx512f")]
59574    unsafe fn test_mm_mask3_fmsub_round_ss() {
59575        let a = _mm_set1_ps(1.);
59576        let b = _mm_set1_ps(2.);
59577        let c = _mm_set1_ps(3.);
59578        let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59579            a, b, c, 0,
59580        );
59581        assert_eq_m128(r, c);
59582        let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59583            a, b, c, 0b11111111,
59584        );
59585        let e = _mm_set_ps(3., 3., 3., -1.);
59586        assert_eq_m128(r, e);
59587    }
59588
59589    #[simd_test(enable = "avx512f")]
59590    unsafe fn test_mm_fmsub_round_sd() {
59591        let a = _mm_set1_pd(1.);
59592        let b = _mm_set1_pd(2.);
59593        let c = _mm_set1_pd(3.);
59594        let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59595        let e = _mm_set_pd(1., -1.);
59596        assert_eq_m128d(r, e);
59597    }
59598
59599    #[simd_test(enable = "avx512f")]
59600    unsafe fn test_mm_mask_fmsub_round_sd() {
59601        let a = _mm_set1_pd(1.);
59602        let b = _mm_set1_pd(2.);
59603        let c = _mm_set1_pd(3.);
59604        let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59605            a, 0, b, c,
59606        );
59607        assert_eq_m128d(r, a);
59608        let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59609            a, 0b11111111, b, c,
59610        );
59611        let e = _mm_set_pd(1., -1.);
59612        assert_eq_m128d(r, e);
59613    }
59614
59615    #[simd_test(enable = "avx512f")]
59616    unsafe fn test_mm_maskz_fmsub_round_sd() {
59617        let a = _mm_set1_pd(1.);
59618        let b = _mm_set1_pd(2.);
59619        let c = _mm_set1_pd(3.);
59620        let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59621            0, a, b, c,
59622        );
59623        let e = _mm_set_pd(1., 0.);
59624        assert_eq_m128d(r, e);
59625        let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59626            0b11111111, a, b, c,
59627        );
59628        let e = _mm_set_pd(1., -1.);
59629        assert_eq_m128d(r, e);
59630    }
59631
59632    #[simd_test(enable = "avx512f")]
59633    unsafe fn test_mm_mask3_fmsub_round_sd() {
59634        let a = _mm_set1_pd(1.);
59635        let b = _mm_set1_pd(2.);
59636        let c = _mm_set1_pd(3.);
59637        let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59638            a, b, c, 0,
59639        );
59640        assert_eq_m128d(r, c);
59641        let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59642            a, b, c, 0b11111111,
59643        );
59644        let e = _mm_set_pd(3., -1.);
59645        assert_eq_m128d(r, e);
59646    }
59647
59648    #[simd_test(enable = "avx512f")]
59649    unsafe fn test_mm_fnmadd_round_ss() {
59650        let a = _mm_set1_ps(1.);
59651        let b = _mm_set1_ps(2.);
59652        let c = _mm_set1_ps(3.);
59653        let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59654        let e = _mm_set_ps(1., 1., 1., 1.);
59655        assert_eq_m128(r, e);
59656    }
59657
59658    #[simd_test(enable = "avx512f")]
59659    unsafe fn test_mm_mask_fnmadd_round_ss() {
59660        let a = _mm_set1_ps(1.);
59661        let b = _mm_set1_ps(2.);
59662        let c = _mm_set1_ps(3.);
59663        let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59664            a, 0, b, c,
59665        );
59666        assert_eq_m128(r, a);
59667        let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59668            a, 0b11111111, b, c,
59669        );
59670        let e = _mm_set_ps(1., 1., 1., 1.);
59671        assert_eq_m128(r, e);
59672    }
59673
59674    #[simd_test(enable = "avx512f")]
59675    unsafe fn test_mm_maskz_fnmadd_round_ss() {
59676        let a = _mm_set1_ps(1.);
59677        let b = _mm_set1_ps(2.);
59678        let c = _mm_set1_ps(3.);
59679        let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59680            0, a, b, c,
59681        );
59682        let e = _mm_set_ps(1., 1., 1., 0.);
59683        assert_eq_m128(r, e);
59684        let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59685            0b11111111, a, b, c,
59686        );
59687        let e = _mm_set_ps(1., 1., 1., 1.);
59688        assert_eq_m128(r, e);
59689    }
59690
59691    #[simd_test(enable = "avx512f")]
59692    unsafe fn test_mm_mask3_fnmadd_round_ss() {
59693        let a = _mm_set1_ps(1.);
59694        let b = _mm_set1_ps(2.);
59695        let c = _mm_set1_ps(3.);
59696        let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59697            a, b, c, 0,
59698        );
59699        assert_eq_m128(r, c);
59700        let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59701            a, b, c, 0b11111111,
59702        );
59703        let e = _mm_set_ps(3., 3., 3., 1.);
59704        assert_eq_m128(r, e);
59705    }
59706
59707    #[simd_test(enable = "avx512f")]
59708    unsafe fn test_mm_fnmadd_round_sd() {
59709        let a = _mm_set1_pd(1.);
59710        let b = _mm_set1_pd(2.);
59711        let c = _mm_set1_pd(3.);
59712        let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59713        let e = _mm_set_pd(1., 1.);
59714        assert_eq_m128d(r, e);
59715    }
59716
59717    #[simd_test(enable = "avx512f")]
59718    unsafe fn test_mm_mask_fnmadd_round_sd() {
59719        let a = _mm_set1_pd(1.);
59720        let b = _mm_set1_pd(2.);
59721        let c = _mm_set1_pd(3.);
59722        let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59723            a, 0, b, c,
59724        );
59725        assert_eq_m128d(r, a);
59726        let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59727            a, 0b11111111, b, c,
59728        );
59729        let e = _mm_set_pd(1., 1.);
59730        assert_eq_m128d(r, e);
59731    }
59732
59733    #[simd_test(enable = "avx512f")]
59734    unsafe fn test_mm_maskz_fnmadd_round_sd() {
59735        let a = _mm_set1_pd(1.);
59736        let b = _mm_set1_pd(2.);
59737        let c = _mm_set1_pd(3.);
59738        let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59739            0, a, b, c,
59740        );
59741        let e = _mm_set_pd(1., 0.);
59742        assert_eq_m128d(r, e);
59743        let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59744            0b11111111, a, b, c,
59745        );
59746        let e = _mm_set_pd(1., 1.);
59747        assert_eq_m128d(r, e);
59748    }
59749
59750    #[simd_test(enable = "avx512f")]
59751    unsafe fn test_mm_mask3_fnmadd_round_sd() {
59752        let a = _mm_set1_pd(1.);
59753        let b = _mm_set1_pd(2.);
59754        let c = _mm_set1_pd(3.);
59755        let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59756            a, b, c, 0,
59757        );
59758        assert_eq_m128d(r, c);
59759        let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59760            a, b, c, 0b11111111,
59761        );
59762        let e = _mm_set_pd(3., 1.);
59763        assert_eq_m128d(r, e);
59764    }
59765
59766    #[simd_test(enable = "avx512f")]
59767    unsafe fn test_mm_fnmsub_round_ss() {
59768        let a = _mm_set1_ps(1.);
59769        let b = _mm_set1_ps(2.);
59770        let c = _mm_set1_ps(3.);
59771        let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59772        let e = _mm_set_ps(1., 1., 1., -5.);
59773        assert_eq_m128(r, e);
59774    }
59775
59776    #[simd_test(enable = "avx512f")]
59777    unsafe fn test_mm_mask_fnmsub_round_ss() {
59778        let a = _mm_set1_ps(1.);
59779        let b = _mm_set1_ps(2.);
59780        let c = _mm_set1_ps(3.);
59781        let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59782            a, 0, b, c,
59783        );
59784        assert_eq_m128(r, a);
59785        let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59786            a, 0b11111111, b, c,
59787        );
59788        let e = _mm_set_ps(1., 1., 1., -5.);
59789        assert_eq_m128(r, e);
59790    }
59791
59792    #[simd_test(enable = "avx512f")]
59793    unsafe fn test_mm_maskz_fnmsub_round_ss() {
59794        let a = _mm_set1_ps(1.);
59795        let b = _mm_set1_ps(2.);
59796        let c = _mm_set1_ps(3.);
59797        let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59798            0, a, b, c,
59799        );
59800        let e = _mm_set_ps(1., 1., 1., 0.);
59801        assert_eq_m128(r, e);
59802        let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59803            0b11111111, a, b, c,
59804        );
59805        let e = _mm_set_ps(1., 1., 1., -5.);
59806        assert_eq_m128(r, e);
59807    }
59808
59809    #[simd_test(enable = "avx512f")]
59810    unsafe fn test_mm_mask3_fnmsub_round_ss() {
59811        let a = _mm_set1_ps(1.);
59812        let b = _mm_set1_ps(2.);
59813        let c = _mm_set1_ps(3.);
59814        let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59815            a, b, c, 0,
59816        );
59817        assert_eq_m128(r, c);
59818        let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59819            a, b, c, 0b11111111,
59820        );
59821        let e = _mm_set_ps(3., 3., 3., -5.);
59822        assert_eq_m128(r, e);
59823    }
59824
59825    #[simd_test(enable = "avx512f")]
59826    unsafe fn test_mm_fnmsub_round_sd() {
59827        let a = _mm_set1_pd(1.);
59828        let b = _mm_set1_pd(2.);
59829        let c = _mm_set1_pd(3.);
59830        let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59831        let e = _mm_set_pd(1., -5.);
59832        assert_eq_m128d(r, e);
59833    }
59834
59835    #[simd_test(enable = "avx512f")]
59836    unsafe fn test_mm_mask_fnmsub_round_sd() {
59837        let a = _mm_set1_pd(1.);
59838        let b = _mm_set1_pd(2.);
59839        let c = _mm_set1_pd(3.);
59840        let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59841            a, 0, b, c,
59842        );
59843        assert_eq_m128d(r, a);
59844        let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59845            a, 0b11111111, b, c,
59846        );
59847        let e = _mm_set_pd(1., -5.);
59848        assert_eq_m128d(r, e);
59849    }
59850
59851    #[simd_test(enable = "avx512f")]
59852    unsafe fn test_mm_maskz_fnmsub_round_sd() {
59853        let a = _mm_set1_pd(1.);
59854        let b = _mm_set1_pd(2.);
59855        let c = _mm_set1_pd(3.);
59856        let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59857            0, a, b, c,
59858        );
59859        let e = _mm_set_pd(1., 0.);
59860        assert_eq_m128d(r, e);
59861        let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59862            0b11111111, a, b, c,
59863        );
59864        let e = _mm_set_pd(1., -5.);
59865        assert_eq_m128d(r, e);
59866    }
59867
59868    #[simd_test(enable = "avx512f")]
59869    unsafe fn test_mm_mask3_fnmsub_round_sd() {
59870        let a = _mm_set1_pd(1.);
59871        let b = _mm_set1_pd(2.);
59872        let c = _mm_set1_pd(3.);
59873        let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59874            a, b, c, 0,
59875        );
59876        assert_eq_m128d(r, c);
59877        let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59878            a, b, c, 0b11111111,
59879        );
59880        let e = _mm_set_pd(3., -5.);
59881        assert_eq_m128d(r, e);
59882    }
59883
59884    #[simd_test(enable = "avx512f")]
59885    unsafe fn test_mm_fixupimm_ss() {
59886        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59887        let b = _mm_set1_ps(f32::MAX);
59888        let c = _mm_set1_epi32(i32::MAX);
59889        let r = _mm_fixupimm_ss::<5>(a, b, c);
59890        let e = _mm_set_ps(0., 0., 0., -0.0);
59891        assert_eq_m128(r, e);
59892    }
59893
59894    #[simd_test(enable = "avx512f")]
59895    unsafe fn test_mm_mask_fixupimm_ss() {
59896        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59897        let b = _mm_set1_ps(f32::MAX);
59898        let c = _mm_set1_epi32(i32::MAX);
59899        let r = _mm_mask_fixupimm_ss::<5>(a, 0b11111111, b, c);
59900        let e = _mm_set_ps(0., 0., 0., -0.0);
59901        assert_eq_m128(r, e);
59902    }
59903
59904    #[simd_test(enable = "avx512f")]
59905    unsafe fn test_mm_maskz_fixupimm_ss() {
59906        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59907        let b = _mm_set1_ps(f32::MAX);
59908        let c = _mm_set1_epi32(i32::MAX);
59909        let r = _mm_maskz_fixupimm_ss::<5>(0b00000000, a, b, c);
59910        let e = _mm_set_ps(0., 0., 0., 0.0);
59911        assert_eq_m128(r, e);
59912        let r = _mm_maskz_fixupimm_ss::<5>(0b11111111, a, b, c);
59913        let e = _mm_set_ps(0., 0., 0., -0.0);
59914        assert_eq_m128(r, e);
59915    }
59916
59917    #[simd_test(enable = "avx512f")]
59918    unsafe fn test_mm_fixupimm_sd() {
59919        let a = _mm_set_pd(0., f64::NAN);
59920        let b = _mm_set1_pd(f64::MAX);
59921        let c = _mm_set1_epi64x(i32::MAX as i64);
59922        let r = _mm_fixupimm_sd::<5>(a, b, c);
59923        let e = _mm_set_pd(0., -0.0);
59924        assert_eq_m128d(r, e);
59925    }
59926
59927    #[simd_test(enable = "avx512f")]
59928    unsafe fn test_mm_mask_fixupimm_sd() {
59929        let a = _mm_set_pd(0., f64::NAN);
59930        let b = _mm_set1_pd(f64::MAX);
59931        let c = _mm_set1_epi64x(i32::MAX as i64);
59932        let r = _mm_mask_fixupimm_sd::<5>(a, 0b11111111, b, c);
59933        let e = _mm_set_pd(0., -0.0);
59934        assert_eq_m128d(r, e);
59935    }
59936
59937    #[simd_test(enable = "avx512f")]
59938    unsafe fn test_mm_maskz_fixupimm_sd() {
59939        let a = _mm_set_pd(0., f64::NAN);
59940        let b = _mm_set1_pd(f64::MAX);
59941        let c = _mm_set1_epi64x(i32::MAX as i64);
59942        let r = _mm_maskz_fixupimm_sd::<5>(0b00000000, a, b, c);
59943        let e = _mm_set_pd(0., 0.0);
59944        assert_eq_m128d(r, e);
59945        let r = _mm_maskz_fixupimm_sd::<5>(0b11111111, a, b, c);
59946        let e = _mm_set_pd(0., -0.0);
59947        assert_eq_m128d(r, e);
59948    }
59949
59950    #[simd_test(enable = "avx512f")]
59951    unsafe fn test_mm_fixupimm_round_ss() {
59952        let a = _mm_set_ps(1., 0., 0., f32::NAN);
59953        let b = _mm_set1_ps(f32::MAX);
59954        let c = _mm_set1_epi32(i32::MAX);
59955        let r = _mm_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
59956        let e = _mm_set_ps(1., 0., 0., -0.0);
59957        assert_eq_m128(r, e);
59958    }
59959
59960    #[simd_test(enable = "avx512f")]
59961    unsafe fn test_mm_mask_fixupimm_round_ss() {
59962        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59963        let b = _mm_set1_ps(f32::MAX);
59964        let c = _mm_set1_epi32(i32::MAX);
59965        let r = _mm_mask_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
59966        let e = _mm_set_ps(0., 0., 0., -0.0);
59967        assert_eq_m128(r, e);
59968    }
59969
59970    #[simd_test(enable = "avx512f")]
59971    unsafe fn test_mm_maskz_fixupimm_round_ss() {
59972        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59973        let b = _mm_set1_ps(f32::MAX);
59974        let c = _mm_set1_epi32(i32::MAX);
59975        let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
59976        let e = _mm_set_ps(0., 0., 0., 0.0);
59977        assert_eq_m128(r, e);
59978        let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
59979        let e = _mm_set_ps(0., 0., 0., -0.0);
59980        assert_eq_m128(r, e);
59981    }
59982
59983    #[simd_test(enable = "avx512f")]
59984    unsafe fn test_mm_fixupimm_round_sd() {
59985        let a = _mm_set_pd(0., f64::NAN);
59986        let b = _mm_set1_pd(f64::MAX);
59987        let c = _mm_set1_epi64x(i32::MAX as i64);
59988        let r = _mm_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
59989        let e = _mm_set_pd(0., -0.0);
59990        assert_eq_m128d(r, e);
59991    }
59992
59993    #[simd_test(enable = "avx512f")]
59994    unsafe fn test_mm_mask_fixupimm_round_sd() {
59995        let a = _mm_set_pd(0., f64::NAN);
59996        let b = _mm_set1_pd(f64::MAX);
59997        let c = _mm_set1_epi64x(i32::MAX as i64);
59998        let r = _mm_mask_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
59999        let e = _mm_set_pd(0., -0.0);
60000        assert_eq_m128d(r, e);
60001    }
60002
60003    #[simd_test(enable = "avx512f")]
60004    unsafe fn test_mm_maskz_fixupimm_round_sd() {
60005        let a = _mm_set_pd(0., f64::NAN);
60006        let b = _mm_set1_pd(f64::MAX);
60007        let c = _mm_set1_epi64x(i32::MAX as i64);
60008        let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
60009        let e = _mm_set_pd(0., 0.0);
60010        assert_eq_m128d(r, e);
60011        let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
60012        let e = _mm_set_pd(0., -0.0);
60013        assert_eq_m128d(r, e);
60014    }
60015
60016    #[simd_test(enable = "avx512f")]
60017    unsafe fn test_mm_mask_cvtss_sd() {
60018        let a = _mm_set_pd(6., -7.5);
60019        let b = _mm_set_ps(0., -0.5, 1., -1.5);
60020        let r = _mm_mask_cvtss_sd(a, 0, a, b);
60021        assert_eq_m128d(r, a);
60022        let r = _mm_mask_cvtss_sd(a, 0b11111111, a, b);
60023        let e = _mm_set_pd(6., -1.5);
60024        assert_eq_m128d(r, e);
60025    }
60026
60027    #[simd_test(enable = "avx512f")]
60028    unsafe fn test_mm_maskz_cvtss_sd() {
60029        let a = _mm_set_pd(6., -7.5);
60030        let b = _mm_set_ps(0., -0.5, 1., -1.5);
60031        let r = _mm_maskz_cvtss_sd(0, a, b);
60032        let e = _mm_set_pd(6., 0.);
60033        assert_eq_m128d(r, e);
60034        let r = _mm_maskz_cvtss_sd(0b11111111, a, b);
60035        let e = _mm_set_pd(6., -1.5);
60036        assert_eq_m128d(r, e);
60037    }
60038
60039    #[simd_test(enable = "avx512f")]
60040    unsafe fn test_mm_mask_cvtsd_ss() {
60041        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60042        let b = _mm_set_pd(6., -7.5);
60043        let r = _mm_mask_cvtsd_ss(a, 0, a, b);
60044        assert_eq_m128(r, a);
60045        let r = _mm_mask_cvtsd_ss(a, 0b11111111, a, b);
60046        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60047        assert_eq_m128(r, e);
60048    }
60049
60050    #[simd_test(enable = "avx512f")]
60051    unsafe fn test_mm_maskz_cvtsd_ss() {
60052        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60053        let b = _mm_set_pd(6., -7.5);
60054        let r = _mm_maskz_cvtsd_ss(0, a, b);
60055        let e = _mm_set_ps(0., -0.5, 1., 0.);
60056        assert_eq_m128(r, e);
60057        let r = _mm_maskz_cvtsd_ss(0b11111111, a, b);
60058        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60059        assert_eq_m128(r, e);
60060    }
60061
60062    #[simd_test(enable = "avx512f")]
60063    unsafe fn test_mm_cvt_roundss_sd() {
60064        let a = _mm_set_pd(6., -7.5);
60065        let b = _mm_set_ps(0., -0.5, 1., -1.5);
60066        let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
60067        let e = _mm_set_pd(6., -1.5);
60068        assert_eq_m128d(r, e);
60069    }
60070
60071    #[simd_test(enable = "avx512f")]
60072    unsafe fn test_mm_mask_cvt_roundss_sd() {
60073        let a = _mm_set_pd(6., -7.5);
60074        let b = _mm_set_ps(0., -0.5, 1., -1.5);
60075        let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60076        assert_eq_m128d(r, a);
60077        let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60078        let e = _mm_set_pd(6., -1.5);
60079        assert_eq_m128d(r, e);
60080    }
60081
60082    #[simd_test(enable = "avx512f")]
60083    unsafe fn test_mm_maskz_cvt_roundss_sd() {
60084        let a = _mm_set_pd(6., -7.5);
60085        let b = _mm_set_ps(0., -0.5, 1., -1.5);
60086        let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60087        let e = _mm_set_pd(6., 0.);
60088        assert_eq_m128d(r, e);
60089        let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60090        let e = _mm_set_pd(6., -1.5);
60091        assert_eq_m128d(r, e);
60092    }
60093
60094    #[simd_test(enable = "avx512f")]
60095    unsafe fn test_mm_cvt_roundsd_ss() {
60096        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60097        let b = _mm_set_pd(6., -7.5);
60098        let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60099        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60100        assert_eq_m128(r, e);
60101    }
60102
60103    #[simd_test(enable = "avx512f")]
60104    unsafe fn test_mm_mask_cvt_roundsd_ss() {
60105        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60106        let b = _mm_set_pd(6., -7.5);
60107        let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
60108        assert_eq_m128(r, a);
60109        let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60110            a, 0b11111111, a, b,
60111        );
60112        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60113        assert_eq_m128(r, e);
60114    }
60115
60116    #[simd_test(enable = "avx512f")]
60117    unsafe fn test_mm_maskz_cvt_roundsd_ss() {
60118        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60119        let b = _mm_set_pd(6., -7.5);
60120        let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60121        let e = _mm_set_ps(0., -0.5, 1., 0.);
60122        assert_eq_m128(r, e);
60123        let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60124            0b11111111, a, b,
60125        );
60126        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60127        assert_eq_m128(r, e);
60128    }
60129
60130    #[simd_test(enable = "avx512f")]
60131    unsafe fn test_mm_cvt_roundss_si32() {
60132        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60133        let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60134        let e: i32 = -1;
60135        assert_eq!(r, e);
60136    }
60137
60138    #[simd_test(enable = "avx512f")]
60139    unsafe fn test_mm_cvt_roundss_i32() {
60140        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60141        let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60142        let e: i32 = -1;
60143        assert_eq!(r, e);
60144    }
60145
60146    #[simd_test(enable = "avx512f")]
60147    unsafe fn test_mm_cvt_roundss_u32() {
60148        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60149        let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60150        let e: u32 = u32::MAX;
60151        assert_eq!(r, e);
60152    }
60153
60154    #[simd_test(enable = "avx512f")]
60155    unsafe fn test_mm_cvtss_i32() {
60156        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60157        let r = _mm_cvtss_i32(a);
60158        let e: i32 = -2;
60159        assert_eq!(r, e);
60160    }
60161
60162    #[simd_test(enable = "avx512f")]
60163    unsafe fn test_mm_cvtss_u32() {
60164        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60165        let r = _mm_cvtss_u32(a);
60166        let e: u32 = u32::MAX;
60167        assert_eq!(r, e);
60168    }
60169
60170    #[simd_test(enable = "avx512f")]
60171    unsafe fn test_mm_cvt_roundsd_si32() {
60172        let a = _mm_set_pd(1., -1.5);
60173        let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60174        let e: i32 = -1;
60175        assert_eq!(r, e);
60176    }
60177
60178    #[simd_test(enable = "avx512f")]
60179    unsafe fn test_mm_cvt_roundsd_i32() {
60180        let a = _mm_set_pd(1., -1.5);
60181        let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60182        let e: i32 = -1;
60183        assert_eq!(r, e);
60184    }
60185
60186    #[simd_test(enable = "avx512f")]
60187    unsafe fn test_mm_cvt_roundsd_u32() {
60188        let a = _mm_set_pd(1., -1.5);
60189        let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60190        let e: u32 = u32::MAX;
60191        assert_eq!(r, e);
60192    }
60193
60194    #[simd_test(enable = "avx512f")]
60195    unsafe fn test_mm_cvtsd_i32() {
60196        let a = _mm_set_pd(1., -1.5);
60197        let r = _mm_cvtsd_i32(a);
60198        let e: i32 = -2;
60199        assert_eq!(r, e);
60200    }
60201
60202    #[simd_test(enable = "avx512f")]
60203    unsafe fn test_mm_cvtsd_u32() {
60204        let a = _mm_set_pd(1., -1.5);
60205        let r = _mm_cvtsd_u32(a);
60206        let e: u32 = u32::MAX;
60207        assert_eq!(r, e);
60208    }
60209
60210    #[simd_test(enable = "avx512f")]
60211    unsafe fn test_mm_cvt_roundi32_ss() {
60212        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60213        let b: i32 = 9;
60214        let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60215        let e = _mm_set_ps(0., -0.5, 1., 9.);
60216        assert_eq_m128(r, e);
60217    }
60218
60219    #[simd_test(enable = "avx512f")]
60220    unsafe fn test_mm_cvt_roundsi32_ss() {
60221        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60222        let b: i32 = 9;
60223        let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60224        let e = _mm_set_ps(0., -0.5, 1., 9.);
60225        assert_eq_m128(r, e);
60226    }
60227
60228    #[simd_test(enable = "avx512f")]
60229    unsafe fn test_mm_cvt_roundu32_ss() {
60230        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60231        let b: u32 = 9;
60232        let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60233        let e = _mm_set_ps(0., -0.5, 1., 9.);
60234        assert_eq_m128(r, e);
60235    }
60236
60237    #[simd_test(enable = "avx512f")]
60238    unsafe fn test_mm_cvti32_ss() {
60239        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60240        let b: i32 = 9;
60241        let r = _mm_cvti32_ss(a, b);
60242        let e = _mm_set_ps(0., -0.5, 1., 9.);
60243        assert_eq_m128(r, e);
60244    }
60245
60246    #[simd_test(enable = "avx512f")]
60247    unsafe fn test_mm_cvti32_sd() {
60248        let a = _mm_set_pd(1., -1.5);
60249        let b: i32 = 9;
60250        let r = _mm_cvti32_sd(a, b);
60251        let e = _mm_set_pd(1., 9.);
60252        assert_eq_m128d(r, e);
60253    }
60254
60255    #[simd_test(enable = "avx512f")]
60256    unsafe fn test_mm_cvtt_roundss_si32() {
60257        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60258        let r = _mm_cvtt_roundss_si32::<_MM_FROUND_NO_EXC>(a);
60259        let e: i32 = -1;
60260        assert_eq!(r, e);
60261    }
60262
60263    #[simd_test(enable = "avx512f")]
60264    unsafe fn test_mm_cvtt_roundss_i32() {
60265        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60266        let r = _mm_cvtt_roundss_i32::<_MM_FROUND_NO_EXC>(a);
60267        let e: i32 = -1;
60268        assert_eq!(r, e);
60269    }
60270
60271    #[simd_test(enable = "avx512f")]
60272    unsafe fn test_mm_cvtt_roundss_u32() {
60273        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60274        let r = _mm_cvtt_roundss_u32::<_MM_FROUND_NO_EXC>(a);
60275        let e: u32 = u32::MAX;
60276        assert_eq!(r, e);
60277    }
60278
60279    #[simd_test(enable = "avx512f")]
60280    unsafe fn test_mm_cvttss_i32() {
60281        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60282        let r = _mm_cvttss_i32(a);
60283        let e: i32 = -1;
60284        assert_eq!(r, e);
60285    }
60286
60287    #[simd_test(enable = "avx512f")]
60288    unsafe fn test_mm_cvttss_u32() {
60289        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60290        let r = _mm_cvttss_u32(a);
60291        let e: u32 = u32::MAX;
60292        assert_eq!(r, e);
60293    }
60294
60295    #[simd_test(enable = "avx512f")]
60296    unsafe fn test_mm_cvtt_roundsd_si32() {
60297        let a = _mm_set_pd(1., -1.5);
60298        let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_NO_EXC>(a);
60299        let e: i32 = -1;
60300        assert_eq!(r, e);
60301    }
60302
60303    #[simd_test(enable = "avx512f")]
60304    unsafe fn test_mm_cvtt_roundsd_i32() {
60305        let a = _mm_set_pd(1., -1.5);
60306        let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_NO_EXC>(a);
60307        let e: i32 = -1;
60308        assert_eq!(r, e);
60309    }
60310
60311    #[simd_test(enable = "avx512f")]
60312    unsafe fn test_mm_cvtt_roundsd_u32() {
60313        let a = _mm_set_pd(1., -1.5);
60314        let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_NO_EXC>(a);
60315        let e: u32 = u32::MAX;
60316        assert_eq!(r, e);
60317    }
60318
60319    #[simd_test(enable = "avx512f")]
60320    unsafe fn test_mm_cvttsd_i32() {
60321        let a = _mm_set_pd(1., -1.5);
60322        let r = _mm_cvttsd_i32(a);
60323        let e: i32 = -1;
60324        assert_eq!(r, e);
60325    }
60326
60327    #[simd_test(enable = "avx512f")]
60328    unsafe fn test_mm_cvttsd_u32() {
60329        let a = _mm_set_pd(1., -1.5);
60330        let r = _mm_cvttsd_u32(a);
60331        let e: u32 = u32::MAX;
60332        assert_eq!(r, e);
60333    }
60334
60335    #[simd_test(enable = "avx512f")]
60336    unsafe fn test_mm_cvtu32_ss() {
60337        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60338        let b: u32 = 9;
60339        let r = _mm_cvtu32_ss(a, b);
60340        let e = _mm_set_ps(0., -0.5, 1., 9.);
60341        assert_eq_m128(r, e);
60342    }
60343
60344    #[simd_test(enable = "avx512f")]
60345    unsafe fn test_mm_cvtu32_sd() {
60346        let a = _mm_set_pd(1., -1.5);
60347        let b: u32 = 9;
60348        let r = _mm_cvtu32_sd(a, b);
60349        let e = _mm_set_pd(1., 9.);
60350        assert_eq_m128d(r, e);
60351    }
60352
60353    #[simd_test(enable = "avx512f")]
60354    unsafe fn test_mm_comi_round_ss() {
60355        let a = _mm_set1_ps(2.2);
60356        let b = _mm_set1_ps(1.1);
60357        let r = _mm_comi_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
60358        let e: i32 = 0;
60359        assert_eq!(r, e);
60360    }
60361
60362    #[simd_test(enable = "avx512f")]
60363    unsafe fn test_mm_comi_round_sd() {
60364        let a = _mm_set1_pd(2.2);
60365        let b = _mm_set1_pd(1.1);
60366        let r = _mm_comi_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
60367        let e: i32 = 0;
60368        assert_eq!(r, e);
60369    }
60370
60371    #[simd_test(enable = "avx512f")]
60372    unsafe fn test_mm512_cvtsi512_si32() {
60373        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
60374        let r = _mm512_cvtsi512_si32(a);
60375        let e: i32 = 1;
60376        assert_eq!(r, e);
60377    }
60378
60379    #[simd_test(enable = "avx512f")]
60380    unsafe fn test_mm512_cvtss_f32() {
60381        let a = _mm512_setr_ps(
60382            312.0134, 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
60383        );
60384        assert_eq!(_mm512_cvtss_f32(a), 312.0134);
60385    }
60386
60387    #[simd_test(enable = "avx512f")]
60388    unsafe fn test_mm512_cvtsd_f64() {
60389        let r = _mm512_cvtsd_f64(_mm512_setr_pd(-1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8));
60390        assert_eq!(r, -1.1);
60391    }
60392
60393    #[simd_test(enable = "avx512f")]
60394    unsafe fn test_mm512_shuffle_pd() {
60395        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60396        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60397        let r = _mm512_shuffle_pd::<0b11_11_11_11>(a, b);
60398        let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
60399        assert_eq_m512d(r, e);
60400    }
60401
60402    #[simd_test(enable = "avx512f")]
60403    unsafe fn test_mm512_mask_shuffle_pd() {
60404        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60405        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60406        let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
60407        assert_eq_m512d(r, a);
60408        let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0b11111111, a, b);
60409        let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
60410        assert_eq_m512d(r, e);
60411    }
60412
60413    #[simd_test(enable = "avx512f")]
60414    unsafe fn test_mm512_maskz_shuffle_pd() {
60415        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60416        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60417        let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
60418        assert_eq_m512d(r, _mm512_setzero_pd());
60419        let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b);
60420        let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.);
60421        assert_eq_m512d(r, e);
60422    }
60423
60424    #[simd_test(enable = "avx512f")]
60425    unsafe fn test_mm512_mask_expandloadu_epi32() {
60426        let src = _mm512_set1_epi32(42);
60427        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
60428        let p = a.as_ptr();
60429        let m = 0b11101000_11001010;
60430        let r = _mm512_mask_expandloadu_epi32(src, m, black_box(p));
60431        let e = _mm512_set_epi32(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
60432        assert_eq_m512i(r, e);
60433    }
60434
60435    #[simd_test(enable = "avx512f")]
60436    unsafe fn test_mm512_maskz_expandloadu_epi32() {
60437        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
60438        let p = a.as_ptr();
60439        let m = 0b11101000_11001010;
60440        let r = _mm512_maskz_expandloadu_epi32(m, black_box(p));
60441        let e = _mm512_set_epi32(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
60442        assert_eq_m512i(r, e);
60443    }
60444
60445    #[simd_test(enable = "avx512f,avx512vl")]
60446    unsafe fn test_mm256_mask_expandloadu_epi32() {
60447        let src = _mm256_set1_epi32(42);
60448        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
60449        let p = a.as_ptr();
60450        let m = 0b11101000;
60451        let r = _mm256_mask_expandloadu_epi32(src, m, black_box(p));
60452        let e = _mm256_set_epi32(4, 3, 2, 42, 1, 42, 42, 42);
60453        assert_eq_m256i(r, e);
60454    }
60455
60456    #[simd_test(enable = "avx512f,avx512vl")]
60457    unsafe fn test_mm256_maskz_expandloadu_epi32() {
60458        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
60459        let p = a.as_ptr();
60460        let m = 0b11101000;
60461        let r = _mm256_maskz_expandloadu_epi32(m, black_box(p));
60462        let e = _mm256_set_epi32(4, 3, 2, 0, 1, 0, 0, 0);
60463        assert_eq_m256i(r, e);
60464    }
60465
60466    #[simd_test(enable = "avx512f,avx512vl")]
60467    unsafe fn test_mm_mask_expandloadu_epi32() {
60468        let src = _mm_set1_epi32(42);
60469        let a = &[1_i32, 2, 3, 4];
60470        let p = a.as_ptr();
60471        let m = 0b11111000;
60472        let r = _mm_mask_expandloadu_epi32(src, m, black_box(p));
60473        let e = _mm_set_epi32(1, 42, 42, 42);
60474        assert_eq_m128i(r, e);
60475    }
60476
60477    #[simd_test(enable = "avx512f,avx512vl")]
60478    unsafe fn test_mm_maskz_expandloadu_epi32() {
60479        let a = &[1_i32, 2, 3, 4];
60480        let p = a.as_ptr();
60481        let m = 0b11111000;
60482        let r = _mm_maskz_expandloadu_epi32(m, black_box(p));
60483        let e = _mm_set_epi32(1, 0, 0, 0);
60484        assert_eq_m128i(r, e);
60485    }
60486
60487    #[simd_test(enable = "avx512f")]
60488    unsafe fn test_mm512_mask_expandloadu_epi64() {
60489        let src = _mm512_set1_epi64(42);
60490        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
60491        let p = a.as_ptr();
60492        let m = 0b11101000;
60493        let r = _mm512_mask_expandloadu_epi64(src, m, black_box(p));
60494        let e = _mm512_set_epi64(4, 3, 2, 42, 1, 42, 42, 42);
60495        assert_eq_m512i(r, e);
60496    }
60497
60498    #[simd_test(enable = "avx512f")]
60499    unsafe fn test_mm512_maskz_expandloadu_epi64() {
60500        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
60501        let p = a.as_ptr();
60502        let m = 0b11101000;
60503        let r = _mm512_maskz_expandloadu_epi64(m, black_box(p));
60504        let e = _mm512_set_epi64(4, 3, 2, 0, 1, 0, 0, 0);
60505        assert_eq_m512i(r, e);
60506    }
60507
60508    #[simd_test(enable = "avx512f,avx512vl")]
60509    unsafe fn test_mm256_mask_expandloadu_epi64() {
60510        let src = _mm256_set1_epi64x(42);
60511        let a = &[1_i64, 2, 3, 4];
60512        let p = a.as_ptr();
60513        let m = 0b11101000;
60514        let r = _mm256_mask_expandloadu_epi64(src, m, black_box(p));
60515        let e = _mm256_set_epi64x(1, 42, 42, 42);
60516        assert_eq_m256i(r, e);
60517    }
60518
60519    #[simd_test(enable = "avx512f,avx512vl")]
60520    unsafe fn test_mm256_maskz_expandloadu_epi64() {
60521        let a = &[1_i64, 2, 3, 4];
60522        let p = a.as_ptr();
60523        let m = 0b11101000;
60524        let r = _mm256_maskz_expandloadu_epi64(m, black_box(p));
60525        let e = _mm256_set_epi64x(1, 0, 0, 0);
60526        assert_eq_m256i(r, e);
60527    }
60528
60529    #[simd_test(enable = "avx512f,avx512vl")]
60530    unsafe fn test_mm_mask_expandloadu_epi64() {
60531        let src = _mm_set1_epi64x(42);
60532        let a = &[1_i64, 2];
60533        let p = a.as_ptr();
60534        let m = 0b11101000;
60535        let r = _mm_mask_expandloadu_epi64(src, m, black_box(p));
60536        let e = _mm_set_epi64x(42, 42);
60537        assert_eq_m128i(r, e);
60538    }
60539
60540    #[simd_test(enable = "avx512f,avx512vl")]
60541    unsafe fn test_mm_maskz_expandloadu_epi64() {
60542        let a = &[1_i64, 2];
60543        let p = a.as_ptr();
60544        let m = 0b11101000;
60545        let r = _mm_maskz_expandloadu_epi64(m, black_box(p));
60546        let e = _mm_set_epi64x(0, 0);
60547        assert_eq_m128i(r, e);
60548    }
60549
60550    #[simd_test(enable = "avx512f")]
60551    unsafe fn test_mm512_mask_expandloadu_ps() {
60552        let src = _mm512_set1_ps(42.);
60553        let a = &[
60554            1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
60555        ];
60556        let p = a.as_ptr();
60557        let m = 0b11101000_11001010;
60558        let r = _mm512_mask_expandloadu_ps(src, m, black_box(p));
60559        let e = _mm512_set_ps(
60560            8., 7., 6., 42., 5., 42., 42., 42., 4., 3., 42., 42., 2., 42., 1., 42.,
60561        );
60562        assert_eq_m512(r, e);
60563    }
60564
60565    #[simd_test(enable = "avx512f")]
60566    unsafe fn test_mm512_maskz_expandloadu_ps() {
60567        let a = &[
60568            1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
60569        ];
60570        let p = a.as_ptr();
60571        let m = 0b11101000_11001010;
60572        let r = _mm512_maskz_expandloadu_ps(m, black_box(p));
60573        let e = _mm512_set_ps(
60574            8., 7., 6., 0., 5., 0., 0., 0., 4., 3., 0., 0., 2., 0., 1., 0.,
60575        );
60576        assert_eq_m512(r, e);
60577    }
60578
60579    #[simd_test(enable = "avx512f,avx512vl")]
60580    unsafe fn test_mm256_mask_expandloadu_ps() {
60581        let src = _mm256_set1_ps(42.);
60582        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
60583        let p = a.as_ptr();
60584        let m = 0b11101000;
60585        let r = _mm256_mask_expandloadu_ps(src, m, black_box(p));
60586        let e = _mm256_set_ps(4., 3., 2., 42., 1., 42., 42., 42.);
60587        assert_eq_m256(r, e);
60588    }
60589
60590    #[simd_test(enable = "avx512f,avx512vl")]
60591    unsafe fn test_mm256_maskz_expandloadu_ps() {
60592        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
60593        let p = a.as_ptr();
60594        let m = 0b11101000;
60595        let r = _mm256_maskz_expandloadu_ps(m, black_box(p));
60596        let e = _mm256_set_ps(4., 3., 2., 0., 1., 0., 0., 0.);
60597        assert_eq_m256(r, e);
60598    }
60599
60600    #[simd_test(enable = "avx512f,avx512vl")]
60601    unsafe fn test_mm_mask_expandloadu_ps() {
60602        let src = _mm_set1_ps(42.);
60603        let a = &[1.0f32, 2., 3., 4.];
60604        let p = a.as_ptr();
60605        let m = 0b11101000;
60606        let r = _mm_mask_expandloadu_ps(src, m, black_box(p));
60607        let e = _mm_set_ps(1., 42., 42., 42.);
60608        assert_eq_m128(r, e);
60609    }
60610
60611    #[simd_test(enable = "avx512f,avx512vl")]
60612    unsafe fn test_mm_maskz_expandloadu_ps() {
60613        let a = &[1.0f32, 2., 3., 4.];
60614        let p = a.as_ptr();
60615        let m = 0b11101000;
60616        let r = _mm_maskz_expandloadu_ps(m, black_box(p));
60617        let e = _mm_set_ps(1., 0., 0., 0.);
60618        assert_eq_m128(r, e);
60619    }
60620
60621    #[simd_test(enable = "avx512f")]
60622    unsafe fn test_mm512_mask_expandloadu_pd() {
60623        let src = _mm512_set1_pd(42.);
60624        let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
60625        let p = a.as_ptr();
60626        let m = 0b11101000;
60627        let r = _mm512_mask_expandloadu_pd(src, m, black_box(p));
60628        let e = _mm512_set_pd(4., 3., 2., 42., 1., 42., 42., 42.);
60629        assert_eq_m512d(r, e);
60630    }
60631
60632    #[simd_test(enable = "avx512f")]
60633    unsafe fn test_mm512_maskz_expandloadu_pd() {
60634        let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
60635        let p = a.as_ptr();
60636        let m = 0b11101000;
60637        let r = _mm512_maskz_expandloadu_pd(m, black_box(p));
60638        let e = _mm512_set_pd(4., 3., 2., 0., 1., 0., 0., 0.);
60639        assert_eq_m512d(r, e);
60640    }
60641
60642    #[simd_test(enable = "avx512f,avx512vl")]
60643    unsafe fn test_mm256_mask_expandloadu_pd() {
60644        let src = _mm256_set1_pd(42.);
60645        let a = &[1.0f64, 2., 3., 4.];
60646        let p = a.as_ptr();
60647        let m = 0b11101000;
60648        let r = _mm256_mask_expandloadu_pd(src, m, black_box(p));
60649        let e = _mm256_set_pd(1., 42., 42., 42.);
60650        assert_eq_m256d(r, e);
60651    }
60652
60653    #[simd_test(enable = "avx512f,avx512vl")]
60654    unsafe fn test_mm256_maskz_expandloadu_pd() {
60655        let a = &[1.0f64, 2., 3., 4.];
60656        let p = a.as_ptr();
60657        let m = 0b11101000;
60658        let r = _mm256_maskz_expandloadu_pd(m, black_box(p));
60659        let e = _mm256_set_pd(1., 0., 0., 0.);
60660        assert_eq_m256d(r, e);
60661    }
60662
60663    #[simd_test(enable = "avx512f,avx512vl")]
60664    unsafe fn test_mm_mask_expandloadu_pd() {
60665        let src = _mm_set1_pd(42.);
60666        let a = &[1.0f64, 2.];
60667        let p = a.as_ptr();
60668        let m = 0b11101000;
60669        let r = _mm_mask_expandloadu_pd(src, m, black_box(p));
60670        let e = _mm_set_pd(42., 42.);
60671        assert_eq_m128d(r, e);
60672    }
60673
60674    #[simd_test(enable = "avx512f,avx512vl")]
60675    unsafe fn test_mm_maskz_expandloadu_pd() {
60676        let a = &[1.0f64, 2.];
60677        let p = a.as_ptr();
60678        let m = 0b11101000;
60679        let r = _mm_maskz_expandloadu_pd(m, black_box(p));
60680        let e = _mm_set_pd(0., 0.);
60681        assert_eq_m128d(r, e);
60682    }
60683}