1#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7 core_arch::{simd::*, x86::*},
8 intrinsics::simd::*,
9 intrinsics::sqrtf64,
10 mem, ptr,
11};
12
13#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub unsafe fn _mm_pause() {
23 pause()
26}
27
28#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm_clflush(p: *const u8) {
37 clflush(p)
38}
39
40#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
51#[stable(feature = "simd_x86", since = "1.27.0")]
52pub unsafe fn _mm_lfence() {
53 lfence()
54}
55
56#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub unsafe fn _mm_mfence() {
69 mfence()
70}
71
72#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80 unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) }
81}
82
83#[inline]
87#[target_feature(enable = "sse2")]
88#[cfg_attr(test, assert_instr(paddw))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91 unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) }
92}
93
94#[inline]
98#[target_feature(enable = "sse2")]
99#[cfg_attr(test, assert_instr(paddd))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102 unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) }
103}
104
105#[inline]
109#[target_feature(enable = "sse2")]
110#[cfg_attr(test, assert_instr(paddq))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113 unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) }
114}
115
116#[inline]
120#[target_feature(enable = "sse2")]
121#[cfg_attr(test, assert_instr(paddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124 unsafe { transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) }
125}
126
127#[inline]
131#[target_feature(enable = "sse2")]
132#[cfg_attr(test, assert_instr(paddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135 unsafe { transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) }
136}
137
138#[inline]
142#[target_feature(enable = "sse2")]
143#[cfg_attr(test, assert_instr(paddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146 unsafe { transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) }
147}
148
149#[inline]
153#[target_feature(enable = "sse2")]
154#[cfg_attr(test, assert_instr(paddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157 unsafe { transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) }
158}
159
160#[inline]
164#[target_feature(enable = "sse2")]
165#[cfg_attr(test, assert_instr(pavgb))]
166#[stable(feature = "simd_x86", since = "1.27.0")]
167pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168 unsafe {
169 let a = simd_cast::<_, u16x16>(a.as_u8x16());
170 let b = simd_cast::<_, u16x16>(b.as_u8x16());
171 let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1));
172 transmute(simd_cast::<_, u8x16>(r))
173 }
174}
175
176#[inline]
180#[target_feature(enable = "sse2")]
181#[cfg_attr(test, assert_instr(pavgw))]
182#[stable(feature = "simd_x86", since = "1.27.0")]
183pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
184 unsafe {
185 let a = simd_cast::<_, u32x8>(a.as_u16x8());
186 let b = simd_cast::<_, u32x8>(b.as_u16x8());
187 let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1));
188 transmute(simd_cast::<_, u16x8>(r))
189 }
190}
191
192#[inline]
200#[target_feature(enable = "sse2")]
201#[cfg_attr(test, assert_instr(pmaddwd))]
202#[stable(feature = "simd_x86", since = "1.27.0")]
203pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
204 unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) }
205}
206
207#[inline]
212#[target_feature(enable = "sse2")]
213#[cfg_attr(test, assert_instr(pmaxsw))]
214#[stable(feature = "simd_x86", since = "1.27.0")]
215pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
216 unsafe {
217 let a = a.as_i16x8();
218 let b = b.as_i16x8();
219 transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
220 }
221}
222
223#[inline]
228#[target_feature(enable = "sse2")]
229#[cfg_attr(test, assert_instr(pmaxub))]
230#[stable(feature = "simd_x86", since = "1.27.0")]
231pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
232 unsafe {
233 let a = a.as_u8x16();
234 let b = b.as_u8x16();
235 transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
236 }
237}
238
239#[inline]
244#[target_feature(enable = "sse2")]
245#[cfg_attr(test, assert_instr(pminsw))]
246#[stable(feature = "simd_x86", since = "1.27.0")]
247pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
248 unsafe {
249 let a = a.as_i16x8();
250 let b = b.as_i16x8();
251 transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
252 }
253}
254
255#[inline]
260#[target_feature(enable = "sse2")]
261#[cfg_attr(test, assert_instr(pminub))]
262#[stable(feature = "simd_x86", since = "1.27.0")]
263pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
264 unsafe {
265 let a = a.as_u8x16();
266 let b = b.as_u8x16();
267 transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
268 }
269}
270
271#[inline]
278#[target_feature(enable = "sse2")]
279#[cfg_attr(test, assert_instr(pmulhw))]
280#[stable(feature = "simd_x86", since = "1.27.0")]
281pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
282 unsafe {
283 let a = simd_cast::<_, i32x8>(a.as_i16x8());
284 let b = simd_cast::<_, i32x8>(b.as_i16x8());
285 let r = simd_shr(simd_mul(a, b), i32x8::splat(16));
286 transmute(simd_cast::<i32x8, i16x8>(r))
287 }
288}
289
290#[inline]
297#[target_feature(enable = "sse2")]
298#[cfg_attr(test, assert_instr(pmulhuw))]
299#[stable(feature = "simd_x86", since = "1.27.0")]
300pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
301 unsafe {
302 let a = simd_cast::<_, u32x8>(a.as_u16x8());
303 let b = simd_cast::<_, u32x8>(b.as_u16x8());
304 let r = simd_shr(simd_mul(a, b), u32x8::splat(16));
305 transmute(simd_cast::<u32x8, u16x8>(r))
306 }
307}
308
309#[inline]
316#[target_feature(enable = "sse2")]
317#[cfg_attr(test, assert_instr(pmullw))]
318#[stable(feature = "simd_x86", since = "1.27.0")]
319pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
320 unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) }
321}
322
323#[inline]
330#[target_feature(enable = "sse2")]
331#[cfg_attr(test, assert_instr(pmuludq))]
332#[stable(feature = "simd_x86", since = "1.27.0")]
333pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
334 unsafe {
335 let a = a.as_u64x2();
336 let b = b.as_u64x2();
337 let mask = u64x2::splat(u32::MAX.into());
338 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
339 }
340}
341
342#[inline]
351#[target_feature(enable = "sse2")]
352#[cfg_attr(test, assert_instr(psadbw))]
353#[stable(feature = "simd_x86", since = "1.27.0")]
354pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
355 unsafe { transmute(psadbw(a.as_u8x16(), b.as_u8x16())) }
356}
357
358#[inline]
362#[target_feature(enable = "sse2")]
363#[cfg_attr(test, assert_instr(psubb))]
364#[stable(feature = "simd_x86", since = "1.27.0")]
365pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
366 unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) }
367}
368
369#[inline]
373#[target_feature(enable = "sse2")]
374#[cfg_attr(test, assert_instr(psubw))]
375#[stable(feature = "simd_x86", since = "1.27.0")]
376pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
377 unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) }
378}
379
380#[inline]
384#[target_feature(enable = "sse2")]
385#[cfg_attr(test, assert_instr(psubd))]
386#[stable(feature = "simd_x86", since = "1.27.0")]
387pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
388 unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) }
389}
390
391#[inline]
395#[target_feature(enable = "sse2")]
396#[cfg_attr(test, assert_instr(psubq))]
397#[stable(feature = "simd_x86", since = "1.27.0")]
398pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
399 unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) }
400}
401
402#[inline]
407#[target_feature(enable = "sse2")]
408#[cfg_attr(test, assert_instr(psubsb))]
409#[stable(feature = "simd_x86", since = "1.27.0")]
410pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
411 unsafe { transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) }
412}
413
414#[inline]
419#[target_feature(enable = "sse2")]
420#[cfg_attr(test, assert_instr(psubsw))]
421#[stable(feature = "simd_x86", since = "1.27.0")]
422pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
423 unsafe { transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) }
424}
425
426#[inline]
431#[target_feature(enable = "sse2")]
432#[cfg_attr(test, assert_instr(psubusb))]
433#[stable(feature = "simd_x86", since = "1.27.0")]
434pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
435 unsafe { transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) }
436}
437
438#[inline]
443#[target_feature(enable = "sse2")]
444#[cfg_attr(test, assert_instr(psubusw))]
445#[stable(feature = "simd_x86", since = "1.27.0")]
446pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
447 unsafe { transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) }
448}
449
450#[inline]
454#[target_feature(enable = "sse2")]
455#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
456#[rustc_legacy_const_generics(1)]
457#[stable(feature = "simd_x86", since = "1.27.0")]
458pub fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
459 static_assert_uimm_bits!(IMM8, 8);
460 unsafe { _mm_slli_si128_impl::<IMM8>(a) }
461}
462
463#[inline]
466#[target_feature(enable = "sse2")]
467unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
468 const fn mask(shift: i32, i: u32) -> u32 {
469 let shift = shift as u32 & 0xff;
470 if shift > 15 { i } else { 16 - shift + i }
471 }
472 transmute::<i8x16, _>(simd_shuffle!(
473 i8x16::ZERO,
474 a.as_i8x16(),
475 [
476 mask(IMM8, 0),
477 mask(IMM8, 1),
478 mask(IMM8, 2),
479 mask(IMM8, 3),
480 mask(IMM8, 4),
481 mask(IMM8, 5),
482 mask(IMM8, 6),
483 mask(IMM8, 7),
484 mask(IMM8, 8),
485 mask(IMM8, 9),
486 mask(IMM8, 10),
487 mask(IMM8, 11),
488 mask(IMM8, 12),
489 mask(IMM8, 13),
490 mask(IMM8, 14),
491 mask(IMM8, 15),
492 ],
493 ))
494}
495
496#[inline]
500#[target_feature(enable = "sse2")]
501#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
502#[rustc_legacy_const_generics(1)]
503#[stable(feature = "simd_x86", since = "1.27.0")]
504pub fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
505 unsafe {
506 static_assert_uimm_bits!(IMM8, 8);
507 _mm_slli_si128_impl::<IMM8>(a)
508 }
509}
510
511#[inline]
515#[target_feature(enable = "sse2")]
516#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
517#[rustc_legacy_const_generics(1)]
518#[stable(feature = "simd_x86", since = "1.27.0")]
519pub fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
520 unsafe {
521 static_assert_uimm_bits!(IMM8, 8);
522 _mm_srli_si128_impl::<IMM8>(a)
523 }
524}
525
526#[inline]
530#[target_feature(enable = "sse2")]
531#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
532#[rustc_legacy_const_generics(1)]
533#[stable(feature = "simd_x86", since = "1.27.0")]
534pub fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
535 static_assert_uimm_bits!(IMM8, 8);
536 unsafe {
537 if IMM8 >= 16 {
538 _mm_setzero_si128()
539 } else {
540 transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
541 }
542 }
543}
544
545#[inline]
550#[target_feature(enable = "sse2")]
551#[cfg_attr(test, assert_instr(psllw))]
552#[stable(feature = "simd_x86", since = "1.27.0")]
553pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
554 unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) }
555}
556
557#[inline]
561#[target_feature(enable = "sse2")]
562#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
563#[rustc_legacy_const_generics(1)]
564#[stable(feature = "simd_x86", since = "1.27.0")]
565pub fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
566 static_assert_uimm_bits!(IMM8, 8);
567 unsafe {
568 if IMM8 >= 32 {
569 _mm_setzero_si128()
570 } else {
571 transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
572 }
573 }
574}
575
576#[inline]
581#[target_feature(enable = "sse2")]
582#[cfg_attr(test, assert_instr(pslld))]
583#[stable(feature = "simd_x86", since = "1.27.0")]
584pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
585 unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) }
586}
587
588#[inline]
592#[target_feature(enable = "sse2")]
593#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
594#[rustc_legacy_const_generics(1)]
595#[stable(feature = "simd_x86", since = "1.27.0")]
596pub fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
597 static_assert_uimm_bits!(IMM8, 8);
598 unsafe {
599 if IMM8 >= 64 {
600 _mm_setzero_si128()
601 } else {
602 transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
603 }
604 }
605}
606
607#[inline]
612#[target_feature(enable = "sse2")]
613#[cfg_attr(test, assert_instr(psllq))]
614#[stable(feature = "simd_x86", since = "1.27.0")]
615pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
616 unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) }
617}
618
619#[inline]
624#[target_feature(enable = "sse2")]
625#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
626#[rustc_legacy_const_generics(1)]
627#[stable(feature = "simd_x86", since = "1.27.0")]
628pub fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
629 static_assert_uimm_bits!(IMM8, 8);
630 unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) }
631}
632
633#[inline]
638#[target_feature(enable = "sse2")]
639#[cfg_attr(test, assert_instr(psraw))]
640#[stable(feature = "simd_x86", since = "1.27.0")]
641pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
642 unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) }
643}
644
645#[inline]
650#[target_feature(enable = "sse2")]
651#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
652#[rustc_legacy_const_generics(1)]
653#[stable(feature = "simd_x86", since = "1.27.0")]
654pub fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
655 static_assert_uimm_bits!(IMM8, 8);
656 unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) }
657}
658
659#[inline]
664#[target_feature(enable = "sse2")]
665#[cfg_attr(test, assert_instr(psrad))]
666#[stable(feature = "simd_x86", since = "1.27.0")]
667pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
668 unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) }
669}
670
671#[inline]
675#[target_feature(enable = "sse2")]
676#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
677#[rustc_legacy_const_generics(1)]
678#[stable(feature = "simd_x86", since = "1.27.0")]
679pub fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
680 static_assert_uimm_bits!(IMM8, 8);
681 unsafe { _mm_srli_si128_impl::<IMM8>(a) }
682}
683
684#[inline]
687#[target_feature(enable = "sse2")]
688unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
689 const fn mask(shift: i32, i: u32) -> u32 {
690 if (shift as u32) > 15 {
691 i + 16
692 } else {
693 i + (shift as u32)
694 }
695 }
696 let x: i8x16 = simd_shuffle!(
697 a.as_i8x16(),
698 i8x16::ZERO,
699 [
700 mask(IMM8, 0),
701 mask(IMM8, 1),
702 mask(IMM8, 2),
703 mask(IMM8, 3),
704 mask(IMM8, 4),
705 mask(IMM8, 5),
706 mask(IMM8, 6),
707 mask(IMM8, 7),
708 mask(IMM8, 8),
709 mask(IMM8, 9),
710 mask(IMM8, 10),
711 mask(IMM8, 11),
712 mask(IMM8, 12),
713 mask(IMM8, 13),
714 mask(IMM8, 14),
715 mask(IMM8, 15),
716 ],
717 );
718 transmute(x)
719}
720
721#[inline]
726#[target_feature(enable = "sse2")]
727#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
728#[rustc_legacy_const_generics(1)]
729#[stable(feature = "simd_x86", since = "1.27.0")]
730pub fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
731 static_assert_uimm_bits!(IMM8, 8);
732 unsafe {
733 if IMM8 >= 16 {
734 _mm_setzero_si128()
735 } else {
736 transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
737 }
738 }
739}
740
741#[inline]
746#[target_feature(enable = "sse2")]
747#[cfg_attr(test, assert_instr(psrlw))]
748#[stable(feature = "simd_x86", since = "1.27.0")]
749pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
750 unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) }
751}
752
753#[inline]
758#[target_feature(enable = "sse2")]
759#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
760#[rustc_legacy_const_generics(1)]
761#[stable(feature = "simd_x86", since = "1.27.0")]
762pub fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
763 static_assert_uimm_bits!(IMM8, 8);
764 unsafe {
765 if IMM8 >= 32 {
766 _mm_setzero_si128()
767 } else {
768 transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
769 }
770 }
771}
772
773#[inline]
778#[target_feature(enable = "sse2")]
779#[cfg_attr(test, assert_instr(psrld))]
780#[stable(feature = "simd_x86", since = "1.27.0")]
781pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
782 unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) }
783}
784
785#[inline]
790#[target_feature(enable = "sse2")]
791#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
792#[rustc_legacy_const_generics(1)]
793#[stable(feature = "simd_x86", since = "1.27.0")]
794pub fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
795 static_assert_uimm_bits!(IMM8, 8);
796 unsafe {
797 if IMM8 >= 64 {
798 _mm_setzero_si128()
799 } else {
800 transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
801 }
802 }
803}
804
805#[inline]
810#[target_feature(enable = "sse2")]
811#[cfg_attr(test, assert_instr(psrlq))]
812#[stable(feature = "simd_x86", since = "1.27.0")]
813pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
814 unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) }
815}
816
817#[inline]
822#[target_feature(enable = "sse2")]
823#[cfg_attr(test, assert_instr(andps))]
824#[stable(feature = "simd_x86", since = "1.27.0")]
825pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
826 unsafe { simd_and(a, b) }
827}
828
829#[inline]
834#[target_feature(enable = "sse2")]
835#[cfg_attr(test, assert_instr(andnps))]
836#[stable(feature = "simd_x86", since = "1.27.0")]
837pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
838 unsafe { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) }
839}
840
841#[inline]
846#[target_feature(enable = "sse2")]
847#[cfg_attr(test, assert_instr(orps))]
848#[stable(feature = "simd_x86", since = "1.27.0")]
849pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
850 unsafe { simd_or(a, b) }
851}
852
853#[inline]
858#[target_feature(enable = "sse2")]
859#[cfg_attr(test, assert_instr(xorps))]
860#[stable(feature = "simd_x86", since = "1.27.0")]
861pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
862 unsafe { simd_xor(a, b) }
863}
864
865#[inline]
869#[target_feature(enable = "sse2")]
870#[cfg_attr(test, assert_instr(pcmpeqb))]
871#[stable(feature = "simd_x86", since = "1.27.0")]
872pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
873 unsafe { transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
874}
875
876#[inline]
880#[target_feature(enable = "sse2")]
881#[cfg_attr(test, assert_instr(pcmpeqw))]
882#[stable(feature = "simd_x86", since = "1.27.0")]
883pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
884 unsafe { transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
885}
886
887#[inline]
891#[target_feature(enable = "sse2")]
892#[cfg_attr(test, assert_instr(pcmpeqd))]
893#[stable(feature = "simd_x86", since = "1.27.0")]
894pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
895 unsafe { transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
896}
897
898#[inline]
902#[target_feature(enable = "sse2")]
903#[cfg_attr(test, assert_instr(pcmpgtb))]
904#[stable(feature = "simd_x86", since = "1.27.0")]
905pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
906 unsafe { transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
907}
908
909#[inline]
913#[target_feature(enable = "sse2")]
914#[cfg_attr(test, assert_instr(pcmpgtw))]
915#[stable(feature = "simd_x86", since = "1.27.0")]
916pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
917 unsafe { transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
918}
919
920#[inline]
924#[target_feature(enable = "sse2")]
925#[cfg_attr(test, assert_instr(pcmpgtd))]
926#[stable(feature = "simd_x86", since = "1.27.0")]
927pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
928 unsafe { transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
929}
930
931#[inline]
935#[target_feature(enable = "sse2")]
936#[cfg_attr(test, assert_instr(pcmpgtb))]
937#[stable(feature = "simd_x86", since = "1.27.0")]
938pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
939 unsafe { transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
940}
941
942#[inline]
946#[target_feature(enable = "sse2")]
947#[cfg_attr(test, assert_instr(pcmpgtw))]
948#[stable(feature = "simd_x86", since = "1.27.0")]
949pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
950 unsafe { transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
951}
952
953#[inline]
957#[target_feature(enable = "sse2")]
958#[cfg_attr(test, assert_instr(pcmpgtd))]
959#[stable(feature = "simd_x86", since = "1.27.0")]
960pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
961 unsafe { transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
962}
963
964#[inline]
969#[target_feature(enable = "sse2")]
970#[cfg_attr(test, assert_instr(cvtdq2pd))]
971#[stable(feature = "simd_x86", since = "1.27.0")]
972pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
973 unsafe {
974 let a = a.as_i32x4();
975 simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
976 }
977}
978
979#[inline]
984#[target_feature(enable = "sse2")]
985#[cfg_attr(test, assert_instr(cvtsi2sd))]
986#[stable(feature = "simd_x86", since = "1.27.0")]
987pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
988 unsafe { simd_insert!(a, 0, b as f64) }
989}
990
991#[inline]
996#[target_feature(enable = "sse2")]
997#[cfg_attr(test, assert_instr(cvtdq2ps))]
998#[stable(feature = "simd_x86", since = "1.27.0")]
999pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
1000 unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) }
1001}
1002
1003#[inline]
1008#[target_feature(enable = "sse2")]
1009#[cfg_attr(test, assert_instr(cvtps2dq))]
1010#[stable(feature = "simd_x86", since = "1.27.0")]
1011pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
1012 unsafe { transmute(cvtps2dq(a)) }
1013}
1014
1015#[inline]
1020#[target_feature(enable = "sse2")]
1021#[stable(feature = "simd_x86", since = "1.27.0")]
1022pub fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1023 unsafe { transmute(i32x4::new(a, 0, 0, 0)) }
1024}
1025
1026#[inline]
1030#[target_feature(enable = "sse2")]
1031#[stable(feature = "simd_x86", since = "1.27.0")]
1032pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1033 unsafe { simd_extract!(a.as_i32x4(), 0) }
1034}
1035
1036#[inline]
1041#[target_feature(enable = "sse2")]
1042#[stable(feature = "simd_x86", since = "1.27.0")]
1044pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1045 unsafe { transmute(i64x2::new(e0, e1)) }
1046}
1047
1048#[inline]
1052#[target_feature(enable = "sse2")]
1053#[stable(feature = "simd_x86", since = "1.27.0")]
1055pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1056 unsafe { transmute(i32x4::new(e0, e1, e2, e3)) }
1057}
1058
1059#[inline]
1063#[target_feature(enable = "sse2")]
1064#[stable(feature = "simd_x86", since = "1.27.0")]
1066pub fn _mm_set_epi16(
1067 e7: i16,
1068 e6: i16,
1069 e5: i16,
1070 e4: i16,
1071 e3: i16,
1072 e2: i16,
1073 e1: i16,
1074 e0: i16,
1075) -> __m128i {
1076 unsafe { transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) }
1077}
1078
1079#[inline]
1083#[target_feature(enable = "sse2")]
1084#[stable(feature = "simd_x86", since = "1.27.0")]
1086pub fn _mm_set_epi8(
1087 e15: i8,
1088 e14: i8,
1089 e13: i8,
1090 e12: i8,
1091 e11: i8,
1092 e10: i8,
1093 e9: i8,
1094 e8: i8,
1095 e7: i8,
1096 e6: i8,
1097 e5: i8,
1098 e4: i8,
1099 e3: i8,
1100 e2: i8,
1101 e1: i8,
1102 e0: i8,
1103) -> __m128i {
1104 unsafe {
1105 #[rustfmt::skip]
1106 transmute(i8x16::new(
1107 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1108 ))
1109 }
1110}
1111
1112#[inline]
1116#[target_feature(enable = "sse2")]
1117#[stable(feature = "simd_x86", since = "1.27.0")]
1119pub fn _mm_set1_epi64x(a: i64) -> __m128i {
1120 _mm_set_epi64x(a, a)
1121}
1122
1123#[inline]
1127#[target_feature(enable = "sse2")]
1128#[stable(feature = "simd_x86", since = "1.27.0")]
1130pub fn _mm_set1_epi32(a: i32) -> __m128i {
1131 _mm_set_epi32(a, a, a, a)
1132}
1133
1134#[inline]
1138#[target_feature(enable = "sse2")]
1139#[stable(feature = "simd_x86", since = "1.27.0")]
1141pub fn _mm_set1_epi16(a: i16) -> __m128i {
1142 _mm_set_epi16(a, a, a, a, a, a, a, a)
1143}
1144
1145#[inline]
1149#[target_feature(enable = "sse2")]
1150#[stable(feature = "simd_x86", since = "1.27.0")]
1152pub fn _mm_set1_epi8(a: i8) -> __m128i {
1153 _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
1154}
1155
1156#[inline]
1160#[target_feature(enable = "sse2")]
1161#[stable(feature = "simd_x86", since = "1.27.0")]
1163pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1164 _mm_set_epi32(e0, e1, e2, e3)
1165}
1166
1167#[inline]
1171#[target_feature(enable = "sse2")]
1172#[stable(feature = "simd_x86", since = "1.27.0")]
1174pub fn _mm_setr_epi16(
1175 e7: i16,
1176 e6: i16,
1177 e5: i16,
1178 e4: i16,
1179 e3: i16,
1180 e2: i16,
1181 e1: i16,
1182 e0: i16,
1183) -> __m128i {
1184 _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1185}
1186
1187#[inline]
1191#[target_feature(enable = "sse2")]
1192#[stable(feature = "simd_x86", since = "1.27.0")]
1194pub fn _mm_setr_epi8(
1195 e15: i8,
1196 e14: i8,
1197 e13: i8,
1198 e12: i8,
1199 e11: i8,
1200 e10: i8,
1201 e9: i8,
1202 e8: i8,
1203 e7: i8,
1204 e6: i8,
1205 e5: i8,
1206 e4: i8,
1207 e3: i8,
1208 e2: i8,
1209 e1: i8,
1210 e0: i8,
1211) -> __m128i {
1212 #[rustfmt::skip]
1213 _mm_set_epi8(
1214 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1215 )
1216}
1217
1218#[inline]
1222#[target_feature(enable = "sse2")]
1223#[cfg_attr(test, assert_instr(xorps))]
1224#[stable(feature = "simd_x86", since = "1.27.0")]
1225pub fn _mm_setzero_si128() -> __m128i {
1226 const { unsafe { mem::zeroed() } }
1227}
1228
1229#[inline]
1233#[target_feature(enable = "sse2")]
1234#[stable(feature = "simd_x86", since = "1.27.0")]
1235pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1236 _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1237}
1238
1239#[inline]
1245#[target_feature(enable = "sse2")]
1246#[cfg_attr(test, assert_instr(movaps))]
1247#[stable(feature = "simd_x86", since = "1.27.0")]
1248pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1249 *mem_addr
1250}
1251
1252#[inline]
1258#[target_feature(enable = "sse2")]
1259#[cfg_attr(test, assert_instr(movups))]
1260#[stable(feature = "simd_x86", since = "1.27.0")]
1261pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1262 let mut dst: __m128i = _mm_undefined_si128();
1263 ptr::copy_nonoverlapping(
1264 mem_addr as *const u8,
1265 ptr::addr_of_mut!(dst) as *mut u8,
1266 mem::size_of::<__m128i>(),
1267 );
1268 dst
1269}
1270
1271#[inline]
1282#[target_feature(enable = "sse2")]
1283#[cfg_attr(test, assert_instr(maskmovdqu))]
1284#[stable(feature = "simd_x86", since = "1.27.0")]
1285pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1286 maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1287}
1288
1289#[inline]
1295#[target_feature(enable = "sse2")]
1296#[cfg_attr(test, assert_instr(movaps))]
1297#[stable(feature = "simd_x86", since = "1.27.0")]
1298pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1299 *mem_addr = a;
1300}
1301
1302#[inline]
1308#[target_feature(enable = "sse2")]
1309#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1311pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1312 mem_addr.write_unaligned(a);
1313}
1314
1315#[inline]
1321#[target_feature(enable = "sse2")]
1322#[stable(feature = "simd_x86", since = "1.27.0")]
1323pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1324 ptr::copy_nonoverlapping(ptr::addr_of!(a) as *const u8, mem_addr as *mut u8, 8);
1325}
1326
1327#[inline]
1342#[target_feature(enable = "sse2")]
1343#[cfg_attr(test, assert_instr(movntdq))]
1344#[stable(feature = "simd_x86", since = "1.27.0")]
1345pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1346 crate::arch::asm!(
1347 vps!("movntdq", ",{a}"),
1348 p = in(reg) mem_addr,
1349 a = in(xmm_reg) a,
1350 options(nostack, preserves_flags),
1351 );
1352}
1353
1354#[inline]
1369#[target_feature(enable = "sse2")]
1370#[cfg_attr(test, assert_instr(movnti))]
1371#[stable(feature = "simd_x86", since = "1.27.0")]
1372pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1373 crate::arch::asm!(
1374 vps!("movnti", ",{a:e}"), p = in(reg) mem_addr,
1376 a = in(reg) a,
1377 options(nostack, preserves_flags),
1378 );
1379}
1380
1381#[inline]
1386#[target_feature(enable = "sse2")]
1387#[cfg_attr(
1389 all(test, not(target_env = "msvc"), target_arch = "x86_64"),
1390 assert_instr(movq)
1391)]
1392#[stable(feature = "simd_x86", since = "1.27.0")]
1393pub fn _mm_move_epi64(a: __m128i) -> __m128i {
1394 unsafe {
1395 let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
1396 transmute(r)
1397 }
1398}
1399
1400#[inline]
1405#[target_feature(enable = "sse2")]
1406#[cfg_attr(test, assert_instr(packsswb))]
1407#[stable(feature = "simd_x86", since = "1.27.0")]
1408pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1409 unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) }
1410}
1411
1412#[inline]
1417#[target_feature(enable = "sse2")]
1418#[cfg_attr(test, assert_instr(packssdw))]
1419#[stable(feature = "simd_x86", since = "1.27.0")]
1420pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1421 unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) }
1422}
1423
1424#[inline]
1429#[target_feature(enable = "sse2")]
1430#[cfg_attr(test, assert_instr(packuswb))]
1431#[stable(feature = "simd_x86", since = "1.27.0")]
1432pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1433 unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) }
1434}
1435
1436#[inline]
1440#[target_feature(enable = "sse2")]
1441#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1442#[rustc_legacy_const_generics(1)]
1443#[stable(feature = "simd_x86", since = "1.27.0")]
1444pub fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1445 static_assert_uimm_bits!(IMM8, 3);
1446 unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
1447}
1448
1449#[inline]
1453#[target_feature(enable = "sse2")]
1454#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1455#[rustc_legacy_const_generics(2)]
1456#[stable(feature = "simd_x86", since = "1.27.0")]
1457pub fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1458 static_assert_uimm_bits!(IMM8, 3);
1459 unsafe { transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
1460}
1461
1462#[inline]
1466#[target_feature(enable = "sse2")]
1467#[cfg_attr(test, assert_instr(pmovmskb))]
1468#[stable(feature = "simd_x86", since = "1.27.0")]
1469pub fn _mm_movemask_epi8(a: __m128i) -> i32 {
1470 unsafe {
1471 let z = i8x16::ZERO;
1472 let m: i8x16 = simd_lt(a.as_i8x16(), z);
1473 simd_bitmask::<_, u16>(m) as u32 as i32
1474 }
1475}
1476
1477#[inline]
1481#[target_feature(enable = "sse2")]
1482#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1483#[rustc_legacy_const_generics(1)]
1484#[stable(feature = "simd_x86", since = "1.27.0")]
1485pub fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1486 static_assert_uimm_bits!(IMM8, 8);
1487 unsafe {
1488 let a = a.as_i32x4();
1489 let x: i32x4 = simd_shuffle!(
1490 a,
1491 a,
1492 [
1493 IMM8 as u32 & 0b11,
1494 (IMM8 as u32 >> 2) & 0b11,
1495 (IMM8 as u32 >> 4) & 0b11,
1496 (IMM8 as u32 >> 6) & 0b11,
1497 ],
1498 );
1499 transmute(x)
1500 }
1501}
1502
1503#[inline]
1511#[target_feature(enable = "sse2")]
1512#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1513#[rustc_legacy_const_generics(1)]
1514#[stable(feature = "simd_x86", since = "1.27.0")]
1515pub fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1516 static_assert_uimm_bits!(IMM8, 8);
1517 unsafe {
1518 let a = a.as_i16x8();
1519 let x: i16x8 = simd_shuffle!(
1520 a,
1521 a,
1522 [
1523 0,
1524 1,
1525 2,
1526 3,
1527 (IMM8 as u32 & 0b11) + 4,
1528 ((IMM8 as u32 >> 2) & 0b11) + 4,
1529 ((IMM8 as u32 >> 4) & 0b11) + 4,
1530 ((IMM8 as u32 >> 6) & 0b11) + 4,
1531 ],
1532 );
1533 transmute(x)
1534 }
1535}
1536
1537#[inline]
1545#[target_feature(enable = "sse2")]
1546#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1547#[rustc_legacy_const_generics(1)]
1548#[stable(feature = "simd_x86", since = "1.27.0")]
1549pub fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1550 static_assert_uimm_bits!(IMM8, 8);
1551 unsafe {
1552 let a = a.as_i16x8();
1553 let x: i16x8 = simd_shuffle!(
1554 a,
1555 a,
1556 [
1557 IMM8 as u32 & 0b11,
1558 (IMM8 as u32 >> 2) & 0b11,
1559 (IMM8 as u32 >> 4) & 0b11,
1560 (IMM8 as u32 >> 6) & 0b11,
1561 4,
1562 5,
1563 6,
1564 7,
1565 ],
1566 );
1567 transmute(x)
1568 }
1569}
1570
1571#[inline]
1575#[target_feature(enable = "sse2")]
1576#[cfg_attr(test, assert_instr(punpckhbw))]
1577#[stable(feature = "simd_x86", since = "1.27.0")]
1578pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1579 unsafe {
1580 transmute::<i8x16, _>(simd_shuffle!(
1581 a.as_i8x16(),
1582 b.as_i8x16(),
1583 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1584 ))
1585 }
1586}
1587
1588#[inline]
1592#[target_feature(enable = "sse2")]
1593#[cfg_attr(test, assert_instr(punpckhwd))]
1594#[stable(feature = "simd_x86", since = "1.27.0")]
1595pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1596 unsafe {
1597 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1598 transmute::<i16x8, _>(x)
1599 }
1600}
1601
1602#[inline]
1606#[target_feature(enable = "sse2")]
1607#[cfg_attr(test, assert_instr(unpckhps))]
1608#[stable(feature = "simd_x86", since = "1.27.0")]
1609pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1610 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) }
1611}
1612
1613#[inline]
1617#[target_feature(enable = "sse2")]
1618#[cfg_attr(test, assert_instr(unpckhpd))]
1619#[stable(feature = "simd_x86", since = "1.27.0")]
1620pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1621 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) }
1622}
1623
1624#[inline]
1628#[target_feature(enable = "sse2")]
1629#[cfg_attr(test, assert_instr(punpcklbw))]
1630#[stable(feature = "simd_x86", since = "1.27.0")]
1631pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1632 unsafe {
1633 transmute::<i8x16, _>(simd_shuffle!(
1634 a.as_i8x16(),
1635 b.as_i8x16(),
1636 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1637 ))
1638 }
1639}
1640
1641#[inline]
1645#[target_feature(enable = "sse2")]
1646#[cfg_attr(test, assert_instr(punpcklwd))]
1647#[stable(feature = "simd_x86", since = "1.27.0")]
1648pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1649 unsafe {
1650 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1651 transmute::<i16x8, _>(x)
1652 }
1653}
1654
1655#[inline]
1659#[target_feature(enable = "sse2")]
1660#[cfg_attr(test, assert_instr(unpcklps))]
1661#[stable(feature = "simd_x86", since = "1.27.0")]
1662pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1663 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) }
1664}
1665
1666#[inline]
1670#[target_feature(enable = "sse2")]
1671#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlhps))]
1672#[stable(feature = "simd_x86", since = "1.27.0")]
1673pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1674 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) }
1675}
1676
1677#[inline]
1682#[target_feature(enable = "sse2")]
1683#[cfg_attr(test, assert_instr(addsd))]
1684#[stable(feature = "simd_x86", since = "1.27.0")]
1685pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1686 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
1687}
1688
1689#[inline]
1694#[target_feature(enable = "sse2")]
1695#[cfg_attr(test, assert_instr(addpd))]
1696#[stable(feature = "simd_x86", since = "1.27.0")]
1697pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1698 unsafe { simd_add(a, b) }
1699}
1700
1701#[inline]
1706#[target_feature(enable = "sse2")]
1707#[cfg_attr(test, assert_instr(divsd))]
1708#[stable(feature = "simd_x86", since = "1.27.0")]
1709pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1710 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
1711}
1712
1713#[inline]
1718#[target_feature(enable = "sse2")]
1719#[cfg_attr(test, assert_instr(divpd))]
1720#[stable(feature = "simd_x86", since = "1.27.0")]
1721pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1722 unsafe { simd_div(a, b) }
1723}
1724
1725#[inline]
1730#[target_feature(enable = "sse2")]
1731#[cfg_attr(test, assert_instr(maxsd))]
1732#[stable(feature = "simd_x86", since = "1.27.0")]
1733pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1734 unsafe { maxsd(a, b) }
1735}
1736
1737#[inline]
1742#[target_feature(enable = "sse2")]
1743#[cfg_attr(test, assert_instr(maxpd))]
1744#[stable(feature = "simd_x86", since = "1.27.0")]
1745pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1746 unsafe { maxpd(a, b) }
1747}
1748
1749#[inline]
1754#[target_feature(enable = "sse2")]
1755#[cfg_attr(test, assert_instr(minsd))]
1756#[stable(feature = "simd_x86", since = "1.27.0")]
1757pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1758 unsafe { minsd(a, b) }
1759}
1760
1761#[inline]
1766#[target_feature(enable = "sse2")]
1767#[cfg_attr(test, assert_instr(minpd))]
1768#[stable(feature = "simd_x86", since = "1.27.0")]
1769pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1770 unsafe { minpd(a, b) }
1771}
1772
1773#[inline]
1778#[target_feature(enable = "sse2")]
1779#[cfg_attr(test, assert_instr(mulsd))]
1780#[stable(feature = "simd_x86", since = "1.27.0")]
1781pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1782 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
1783}
1784
1785#[inline]
1790#[target_feature(enable = "sse2")]
1791#[cfg_attr(test, assert_instr(mulpd))]
1792#[stable(feature = "simd_x86", since = "1.27.0")]
1793pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1794 unsafe { simd_mul(a, b) }
1795}
1796
1797#[inline]
1802#[target_feature(enable = "sse2")]
1803#[cfg_attr(test, assert_instr(sqrtsd))]
1804#[stable(feature = "simd_x86", since = "1.27.0")]
1805pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1806 unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) }
1807}
1808
1809#[inline]
1813#[target_feature(enable = "sse2")]
1814#[cfg_attr(test, assert_instr(sqrtpd))]
1815#[stable(feature = "simd_x86", since = "1.27.0")]
1816pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1817 unsafe { simd_fsqrt(a) }
1818}
1819
1820#[inline]
1825#[target_feature(enable = "sse2")]
1826#[cfg_attr(test, assert_instr(subsd))]
1827#[stable(feature = "simd_x86", since = "1.27.0")]
1828pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1829 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
1830}
1831
1832#[inline]
1837#[target_feature(enable = "sse2")]
1838#[cfg_attr(test, assert_instr(subpd))]
1839#[stable(feature = "simd_x86", since = "1.27.0")]
1840pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1841 unsafe { simd_sub(a, b) }
1842}
1843
1844#[inline]
1849#[target_feature(enable = "sse2")]
1850#[cfg_attr(test, assert_instr(andps))]
1851#[stable(feature = "simd_x86", since = "1.27.0")]
1852pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1853 unsafe {
1854 let a: __m128i = transmute(a);
1855 let b: __m128i = transmute(b);
1856 transmute(_mm_and_si128(a, b))
1857 }
1858}
1859
1860#[inline]
1864#[target_feature(enable = "sse2")]
1865#[cfg_attr(test, assert_instr(andnps))]
1866#[stable(feature = "simd_x86", since = "1.27.0")]
1867pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1868 unsafe {
1869 let a: __m128i = transmute(a);
1870 let b: __m128i = transmute(b);
1871 transmute(_mm_andnot_si128(a, b))
1872 }
1873}
1874
1875#[inline]
1879#[target_feature(enable = "sse2")]
1880#[cfg_attr(test, assert_instr(orps))]
1881#[stable(feature = "simd_x86", since = "1.27.0")]
1882pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1883 unsafe {
1884 let a: __m128i = transmute(a);
1885 let b: __m128i = transmute(b);
1886 transmute(_mm_or_si128(a, b))
1887 }
1888}
1889
1890#[inline]
1894#[target_feature(enable = "sse2")]
1895#[cfg_attr(test, assert_instr(xorps))]
1896#[stable(feature = "simd_x86", since = "1.27.0")]
1897pub fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1898 unsafe {
1899 let a: __m128i = transmute(a);
1900 let b: __m128i = transmute(b);
1901 transmute(_mm_xor_si128(a, b))
1902 }
1903}
1904
1905#[inline]
1910#[target_feature(enable = "sse2")]
1911#[cfg_attr(test, assert_instr(cmpeqsd))]
1912#[stable(feature = "simd_x86", since = "1.27.0")]
1913pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1914 unsafe { cmpsd(a, b, 0) }
1915}
1916
1917#[inline]
1922#[target_feature(enable = "sse2")]
1923#[cfg_attr(test, assert_instr(cmpltsd))]
1924#[stable(feature = "simd_x86", since = "1.27.0")]
1925pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1926 unsafe { cmpsd(a, b, 1) }
1927}
1928
1929#[inline]
1934#[target_feature(enable = "sse2")]
1935#[cfg_attr(test, assert_instr(cmplesd))]
1936#[stable(feature = "simd_x86", since = "1.27.0")]
1937pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1938 unsafe { cmpsd(a, b, 2) }
1939}
1940
1941#[inline]
1946#[target_feature(enable = "sse2")]
1947#[cfg_attr(test, assert_instr(cmpltsd))]
1948#[stable(feature = "simd_x86", since = "1.27.0")]
1949pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1950 unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1951}
1952
1953#[inline]
1958#[target_feature(enable = "sse2")]
1959#[cfg_attr(test, assert_instr(cmplesd))]
1960#[stable(feature = "simd_x86", since = "1.27.0")]
1961pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1962 unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1963}
1964
1965#[inline]
1972#[target_feature(enable = "sse2")]
1973#[cfg_attr(test, assert_instr(cmpordsd))]
1974#[stable(feature = "simd_x86", since = "1.27.0")]
1975pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
1976 unsafe { cmpsd(a, b, 7) }
1977}
1978
1979#[inline]
1985#[target_feature(enable = "sse2")]
1986#[cfg_attr(test, assert_instr(cmpunordsd))]
1987#[stable(feature = "simd_x86", since = "1.27.0")]
1988pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
1989 unsafe { cmpsd(a, b, 3) }
1990}
1991
1992#[inline]
1997#[target_feature(enable = "sse2")]
1998#[cfg_attr(test, assert_instr(cmpneqsd))]
1999#[stable(feature = "simd_x86", since = "1.27.0")]
2000pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2001 unsafe { cmpsd(a, b, 4) }
2002}
2003
2004#[inline]
2009#[target_feature(enable = "sse2")]
2010#[cfg_attr(test, assert_instr(cmpnltsd))]
2011#[stable(feature = "simd_x86", since = "1.27.0")]
2012pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2013 unsafe { cmpsd(a, b, 5) }
2014}
2015
2016#[inline]
2021#[target_feature(enable = "sse2")]
2022#[cfg_attr(test, assert_instr(cmpnlesd))]
2023#[stable(feature = "simd_x86", since = "1.27.0")]
2024pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2025 unsafe { cmpsd(a, b, 6) }
2026}
2027
2028#[inline]
2033#[target_feature(enable = "sse2")]
2034#[cfg_attr(test, assert_instr(cmpnltsd))]
2035#[stable(feature = "simd_x86", since = "1.27.0")]
2036pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2037 unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2038}
2039
2040#[inline]
2045#[target_feature(enable = "sse2")]
2046#[cfg_attr(test, assert_instr(cmpnlesd))]
2047#[stable(feature = "simd_x86", since = "1.27.0")]
2048pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2049 unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2050}
2051
2052#[inline]
2056#[target_feature(enable = "sse2")]
2057#[cfg_attr(test, assert_instr(cmpeqpd))]
2058#[stable(feature = "simd_x86", since = "1.27.0")]
2059pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2060 unsafe { cmppd(a, b, 0) }
2061}
2062
2063#[inline]
2067#[target_feature(enable = "sse2")]
2068#[cfg_attr(test, assert_instr(cmpltpd))]
2069#[stable(feature = "simd_x86", since = "1.27.0")]
2070pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2071 unsafe { cmppd(a, b, 1) }
2072}
2073
2074#[inline]
2078#[target_feature(enable = "sse2")]
2079#[cfg_attr(test, assert_instr(cmplepd))]
2080#[stable(feature = "simd_x86", since = "1.27.0")]
2081pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2082 unsafe { cmppd(a, b, 2) }
2083}
2084
2085#[inline]
2089#[target_feature(enable = "sse2")]
2090#[cfg_attr(test, assert_instr(cmpltpd))]
2091#[stable(feature = "simd_x86", since = "1.27.0")]
2092pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2093 _mm_cmplt_pd(b, a)
2094}
2095
2096#[inline]
2100#[target_feature(enable = "sse2")]
2101#[cfg_attr(test, assert_instr(cmplepd))]
2102#[stable(feature = "simd_x86", since = "1.27.0")]
2103pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2104 _mm_cmple_pd(b, a)
2105}
2106
2107#[inline]
2111#[target_feature(enable = "sse2")]
2112#[cfg_attr(test, assert_instr(cmpordpd))]
2113#[stable(feature = "simd_x86", since = "1.27.0")]
2114pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2115 unsafe { cmppd(a, b, 7) }
2116}
2117
2118#[inline]
2122#[target_feature(enable = "sse2")]
2123#[cfg_attr(test, assert_instr(cmpunordpd))]
2124#[stable(feature = "simd_x86", since = "1.27.0")]
2125pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2126 unsafe { cmppd(a, b, 3) }
2127}
2128
2129#[inline]
2133#[target_feature(enable = "sse2")]
2134#[cfg_attr(test, assert_instr(cmpneqpd))]
2135#[stable(feature = "simd_x86", since = "1.27.0")]
2136pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2137 unsafe { cmppd(a, b, 4) }
2138}
2139
2140#[inline]
2144#[target_feature(enable = "sse2")]
2145#[cfg_attr(test, assert_instr(cmpnltpd))]
2146#[stable(feature = "simd_x86", since = "1.27.0")]
2147pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2148 unsafe { cmppd(a, b, 5) }
2149}
2150
2151#[inline]
2155#[target_feature(enable = "sse2")]
2156#[cfg_attr(test, assert_instr(cmpnlepd))]
2157#[stable(feature = "simd_x86", since = "1.27.0")]
2158pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2159 unsafe { cmppd(a, b, 6) }
2160}
2161
2162#[inline]
2166#[target_feature(enable = "sse2")]
2167#[cfg_attr(test, assert_instr(cmpnltpd))]
2168#[stable(feature = "simd_x86", since = "1.27.0")]
2169pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2170 _mm_cmpnlt_pd(b, a)
2171}
2172
2173#[inline]
2178#[target_feature(enable = "sse2")]
2179#[cfg_attr(test, assert_instr(cmpnlepd))]
2180#[stable(feature = "simd_x86", since = "1.27.0")]
2181pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2182 _mm_cmpnle_pd(b, a)
2183}
2184
2185#[inline]
2189#[target_feature(enable = "sse2")]
2190#[cfg_attr(test, assert_instr(comisd))]
2191#[stable(feature = "simd_x86", since = "1.27.0")]
2192pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2193 unsafe { comieqsd(a, b) }
2194}
2195
2196#[inline]
2200#[target_feature(enable = "sse2")]
2201#[cfg_attr(test, assert_instr(comisd))]
2202#[stable(feature = "simd_x86", since = "1.27.0")]
2203pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2204 unsafe { comiltsd(a, b) }
2205}
2206
2207#[inline]
2211#[target_feature(enable = "sse2")]
2212#[cfg_attr(test, assert_instr(comisd))]
2213#[stable(feature = "simd_x86", since = "1.27.0")]
2214pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2215 unsafe { comilesd(a, b) }
2216}
2217
2218#[inline]
2222#[target_feature(enable = "sse2")]
2223#[cfg_attr(test, assert_instr(comisd))]
2224#[stable(feature = "simd_x86", since = "1.27.0")]
2225pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2226 unsafe { comigtsd(a, b) }
2227}
2228
2229#[inline]
2233#[target_feature(enable = "sse2")]
2234#[cfg_attr(test, assert_instr(comisd))]
2235#[stable(feature = "simd_x86", since = "1.27.0")]
2236pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2237 unsafe { comigesd(a, b) }
2238}
2239
2240#[inline]
2244#[target_feature(enable = "sse2")]
2245#[cfg_attr(test, assert_instr(comisd))]
2246#[stable(feature = "simd_x86", since = "1.27.0")]
2247pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2248 unsafe { comineqsd(a, b) }
2249}
2250
2251#[inline]
2255#[target_feature(enable = "sse2")]
2256#[cfg_attr(test, assert_instr(ucomisd))]
2257#[stable(feature = "simd_x86", since = "1.27.0")]
2258pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2259 unsafe { ucomieqsd(a, b) }
2260}
2261
2262#[inline]
2266#[target_feature(enable = "sse2")]
2267#[cfg_attr(test, assert_instr(ucomisd))]
2268#[stable(feature = "simd_x86", since = "1.27.0")]
2269pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2270 unsafe { ucomiltsd(a, b) }
2271}
2272
2273#[inline]
2277#[target_feature(enable = "sse2")]
2278#[cfg_attr(test, assert_instr(ucomisd))]
2279#[stable(feature = "simd_x86", since = "1.27.0")]
2280pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2281 unsafe { ucomilesd(a, b) }
2282}
2283
2284#[inline]
2288#[target_feature(enable = "sse2")]
2289#[cfg_attr(test, assert_instr(ucomisd))]
2290#[stable(feature = "simd_x86", since = "1.27.0")]
2291pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2292 unsafe { ucomigtsd(a, b) }
2293}
2294
2295#[inline]
2299#[target_feature(enable = "sse2")]
2300#[cfg_attr(test, assert_instr(ucomisd))]
2301#[stable(feature = "simd_x86", since = "1.27.0")]
2302pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2303 unsafe { ucomigesd(a, b) }
2304}
2305
2306#[inline]
2310#[target_feature(enable = "sse2")]
2311#[cfg_attr(test, assert_instr(ucomisd))]
2312#[stable(feature = "simd_x86", since = "1.27.0")]
2313pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2314 unsafe { ucomineqsd(a, b) }
2315}
2316
2317#[inline]
2322#[target_feature(enable = "sse2")]
2323#[cfg_attr(test, assert_instr(cvtpd2ps))]
2324#[stable(feature = "simd_x86", since = "1.27.0")]
2325pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2326 unsafe {
2327 let r = simd_cast::<_, f32x2>(a.as_f64x2());
2328 let zero = f32x2::ZERO;
2329 transmute::<f32x4, _>(simd_shuffle!(r, zero, [0, 1, 2, 3]))
2330 }
2331}
2332
2333#[inline]
2339#[target_feature(enable = "sse2")]
2340#[cfg_attr(test, assert_instr(cvtps2pd))]
2341#[stable(feature = "simd_x86", since = "1.27.0")]
2342pub fn _mm_cvtps_pd(a: __m128) -> __m128d {
2343 unsafe {
2344 let a = a.as_f32x4();
2345 transmute(simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2346 }
2347}
2348
2349#[inline]
2354#[target_feature(enable = "sse2")]
2355#[cfg_attr(test, assert_instr(cvtpd2dq))]
2356#[stable(feature = "simd_x86", since = "1.27.0")]
2357pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2358 unsafe { transmute(cvtpd2dq(a)) }
2359}
2360
2361#[inline]
2366#[target_feature(enable = "sse2")]
2367#[cfg_attr(test, assert_instr(cvtsd2si))]
2368#[stable(feature = "simd_x86", since = "1.27.0")]
2369pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2370 unsafe { cvtsd2si(a) }
2371}
2372
2373#[inline]
2380#[target_feature(enable = "sse2")]
2381#[cfg_attr(test, assert_instr(cvtsd2ss))]
2382#[stable(feature = "simd_x86", since = "1.27.0")]
2383pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2384 unsafe { cvtsd2ss(a, b) }
2385}
2386
2387#[inline]
2391#[target_feature(enable = "sse2")]
2392#[stable(feature = "simd_x86", since = "1.27.0")]
2393pub fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2394 unsafe { simd_extract!(a, 0) }
2395}
2396
2397#[inline]
2404#[target_feature(enable = "sse2")]
2405#[cfg_attr(test, assert_instr(cvtss2sd))]
2406#[stable(feature = "simd_x86", since = "1.27.0")]
2407pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2408 unsafe { cvtss2sd(a, b) }
2409}
2410
2411#[inline]
2416#[target_feature(enable = "sse2")]
2417#[cfg_attr(test, assert_instr(cvttpd2dq))]
2418#[stable(feature = "simd_x86", since = "1.27.0")]
2419pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2420 unsafe { transmute(cvttpd2dq(a)) }
2421}
2422
2423#[inline]
2428#[target_feature(enable = "sse2")]
2429#[cfg_attr(test, assert_instr(cvttsd2si))]
2430#[stable(feature = "simd_x86", since = "1.27.0")]
2431pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2432 unsafe { cvttsd2si(a) }
2433}
2434
2435#[inline]
2440#[target_feature(enable = "sse2")]
2441#[cfg_attr(test, assert_instr(cvttps2dq))]
2442#[stable(feature = "simd_x86", since = "1.27.0")]
2443pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2444 unsafe { transmute(cvttps2dq(a)) }
2445}
2446
2447#[inline]
2452#[target_feature(enable = "sse2")]
2453#[stable(feature = "simd_x86", since = "1.27.0")]
2454pub fn _mm_set_sd(a: f64) -> __m128d {
2455 _mm_set_pd(0.0, a)
2456}
2457
2458#[inline]
2463#[target_feature(enable = "sse2")]
2464#[stable(feature = "simd_x86", since = "1.27.0")]
2465pub fn _mm_set1_pd(a: f64) -> __m128d {
2466 _mm_set_pd(a, a)
2467}
2468
2469#[inline]
2474#[target_feature(enable = "sse2")]
2475#[stable(feature = "simd_x86", since = "1.27.0")]
2476pub fn _mm_set_pd1(a: f64) -> __m128d {
2477 _mm_set_pd(a, a)
2478}
2479
2480#[inline]
2485#[target_feature(enable = "sse2")]
2486#[stable(feature = "simd_x86", since = "1.27.0")]
2487pub fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2488 __m128d([b, a])
2489}
2490
2491#[inline]
2496#[target_feature(enable = "sse2")]
2497#[stable(feature = "simd_x86", since = "1.27.0")]
2498pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2499 _mm_set_pd(b, a)
2500}
2501
2502#[inline]
2507#[target_feature(enable = "sse2")]
2508#[cfg_attr(test, assert_instr(xorp))]
2509#[stable(feature = "simd_x86", since = "1.27.0")]
2510pub fn _mm_setzero_pd() -> __m128d {
2511 const { unsafe { mem::zeroed() } }
2512}
2513
2514#[inline]
2521#[target_feature(enable = "sse2")]
2522#[cfg_attr(test, assert_instr(movmskpd))]
2523#[stable(feature = "simd_x86", since = "1.27.0")]
2524pub fn _mm_movemask_pd(a: __m128d) -> i32 {
2525 unsafe {
2528 let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO);
2529 simd_bitmask::<i64x2, u8>(mask).into()
2530 }
2531}
2532
2533#[inline]
2540#[target_feature(enable = "sse2")]
2541#[cfg_attr(test, assert_instr(movaps))]
2542#[stable(feature = "simd_x86", since = "1.27.0")]
2543#[allow(clippy::cast_ptr_alignment)]
2544pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2545 *(mem_addr as *const __m128d)
2546}
2547
2548#[inline]
2553#[target_feature(enable = "sse2")]
2554#[cfg_attr(test, assert_instr(movsd))]
2555#[stable(feature = "simd_x86", since = "1.27.0")]
2556pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2557 _mm_setr_pd(*mem_addr, 0.)
2558}
2559
2560#[inline]
2566#[target_feature(enable = "sse2")]
2567#[cfg_attr(test, assert_instr(movhps))]
2568#[stable(feature = "simd_x86", since = "1.27.0")]
2569pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2570 _mm_setr_pd(simd_extract!(a, 0), *mem_addr)
2571}
2572
2573#[inline]
2579#[target_feature(enable = "sse2")]
2580#[cfg_attr(test, assert_instr(movlps))]
2581#[stable(feature = "simd_x86", since = "1.27.0")]
2582pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2583 _mm_setr_pd(*mem_addr, simd_extract!(a, 1))
2584}
2585
2586#[inline]
2602#[target_feature(enable = "sse2")]
2603#[cfg_attr(test, assert_instr(movntpd))]
2604#[stable(feature = "simd_x86", since = "1.27.0")]
2605#[allow(clippy::cast_ptr_alignment)]
2606pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2607 crate::arch::asm!(
2608 vps!("movntpd", ",{a}"),
2609 p = in(reg) mem_addr,
2610 a = in(xmm_reg) a,
2611 options(nostack, preserves_flags),
2612 );
2613}
2614
2615#[inline]
2620#[target_feature(enable = "sse2")]
2621#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlps))]
2622#[stable(feature = "simd_x86", since = "1.27.0")]
2623pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2624 *mem_addr = simd_extract!(a, 0)
2625}
2626
2627#[inline]
2633#[target_feature(enable = "sse2")]
2634#[cfg_attr(test, assert_instr(movaps))]
2635#[stable(feature = "simd_x86", since = "1.27.0")]
2636#[allow(clippy::cast_ptr_alignment)]
2637pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2638 *(mem_addr as *mut __m128d) = a;
2639}
2640
2641#[inline]
2647#[target_feature(enable = "sse2")]
2648#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
2650pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2651 mem_addr.cast::<__m128d>().write_unaligned(a);
2652}
2653
2654#[inline]
2660#[target_feature(enable = "sse2")]
2661#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2662pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2663 ptr::write_unaligned(mem_addr as *mut i16, simd_extract(a.as_i16x8(), 0))
2664}
2665
2666#[inline]
2672#[target_feature(enable = "sse2")]
2673#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2674pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2675 ptr::write_unaligned(mem_addr as *mut i32, simd_extract(a.as_i32x4(), 0))
2676}
2677
2678#[inline]
2684#[target_feature(enable = "sse2")]
2685#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2686pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2687 ptr::write_unaligned(mem_addr as *mut i64, simd_extract(a.as_i64x2(), 0))
2688}
2689
2690#[inline]
2696#[target_feature(enable = "sse2")]
2697#[stable(feature = "simd_x86", since = "1.27.0")]
2698#[allow(clippy::cast_ptr_alignment)]
2699pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2700 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2701 *(mem_addr as *mut __m128d) = b;
2702}
2703
2704#[inline]
2710#[target_feature(enable = "sse2")]
2711#[stable(feature = "simd_x86", since = "1.27.0")]
2712#[allow(clippy::cast_ptr_alignment)]
2713pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2714 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2715 *(mem_addr as *mut __m128d) = b;
2716}
2717
2718#[inline]
2725#[target_feature(enable = "sse2")]
2726#[stable(feature = "simd_x86", since = "1.27.0")]
2727#[allow(clippy::cast_ptr_alignment)]
2728pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2729 let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2730 *(mem_addr as *mut __m128d) = b;
2731}
2732
2733#[inline]
2738#[target_feature(enable = "sse2")]
2739#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movhps))]
2740#[stable(feature = "simd_x86", since = "1.27.0")]
2741pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2742 *mem_addr = simd_extract!(a, 1);
2743}
2744
2745#[inline]
2750#[target_feature(enable = "sse2")]
2751#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlps))]
2752#[stable(feature = "simd_x86", since = "1.27.0")]
2753pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2754 *mem_addr = simd_extract!(a, 0);
2755}
2756
2757#[inline]
2762#[target_feature(enable = "sse2")]
2763#[stable(feature = "simd_x86", since = "1.27.0")]
2765pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2766 let d = *mem_addr;
2767 _mm_setr_pd(d, d)
2768}
2769
2770#[inline]
2775#[target_feature(enable = "sse2")]
2776#[stable(feature = "simd_x86", since = "1.27.0")]
2778pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2779 _mm_load1_pd(mem_addr)
2780}
2781
2782#[inline]
2788#[target_feature(enable = "sse2")]
2789#[cfg_attr(test, assert_instr(movaps))]
2790#[stable(feature = "simd_x86", since = "1.27.0")]
2791pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2792 let a = _mm_load_pd(mem_addr);
2793 simd_shuffle!(a, a, [1, 0])
2794}
2795
2796#[inline]
2802#[target_feature(enable = "sse2")]
2803#[cfg_attr(test, assert_instr(movups))]
2804#[stable(feature = "simd_x86", since = "1.27.0")]
2805pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2806 let mut dst = _mm_undefined_pd();
2807 ptr::copy_nonoverlapping(
2808 mem_addr as *const u8,
2809 ptr::addr_of_mut!(dst) as *mut u8,
2810 mem::size_of::<__m128d>(),
2811 );
2812 dst
2813}
2814
2815#[inline]
2821#[target_feature(enable = "sse2")]
2822#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2823pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
2824 transmute(i16x8::new(
2825 ptr::read_unaligned(mem_addr as *const i16),
2826 0,
2827 0,
2828 0,
2829 0,
2830 0,
2831 0,
2832 0,
2833 ))
2834}
2835
2836#[inline]
2842#[target_feature(enable = "sse2")]
2843#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2844pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
2845 transmute(i32x4::new(
2846 ptr::read_unaligned(mem_addr as *const i32),
2847 0,
2848 0,
2849 0,
2850 ))
2851}
2852
2853#[inline]
2859#[target_feature(enable = "sse2")]
2860#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
2861pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
2862 transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0))
2863}
2864
2865#[inline]
2871#[target_feature(enable = "sse2")]
2872#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
2873#[rustc_legacy_const_generics(2)]
2874#[stable(feature = "simd_x86", since = "1.27.0")]
2875pub fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2876 static_assert_uimm_bits!(MASK, 8);
2877 unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) }
2878}
2879
2880#[inline]
2886#[target_feature(enable = "sse2")]
2887#[cfg_attr(test, assert_instr(movsd))]
2888#[stable(feature = "simd_x86", since = "1.27.0")]
2889pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2890 unsafe { _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) }
2891}
2892
2893#[inline]
2898#[target_feature(enable = "sse2")]
2899#[stable(feature = "simd_x86", since = "1.27.0")]
2900pub fn _mm_castpd_ps(a: __m128d) -> __m128 {
2901 unsafe { transmute(a) }
2902}
2903
2904#[inline]
2909#[target_feature(enable = "sse2")]
2910#[stable(feature = "simd_x86", since = "1.27.0")]
2911pub fn _mm_castpd_si128(a: __m128d) -> __m128i {
2912 unsafe { transmute(a) }
2913}
2914
2915#[inline]
2920#[target_feature(enable = "sse2")]
2921#[stable(feature = "simd_x86", since = "1.27.0")]
2922pub fn _mm_castps_pd(a: __m128) -> __m128d {
2923 unsafe { transmute(a) }
2924}
2925
2926#[inline]
2931#[target_feature(enable = "sse2")]
2932#[stable(feature = "simd_x86", since = "1.27.0")]
2933pub fn _mm_castps_si128(a: __m128) -> __m128i {
2934 unsafe { transmute(a) }
2935}
2936
2937#[inline]
2942#[target_feature(enable = "sse2")]
2943#[stable(feature = "simd_x86", since = "1.27.0")]
2944pub fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2945 unsafe { transmute(a) }
2946}
2947
2948#[inline]
2953#[target_feature(enable = "sse2")]
2954#[stable(feature = "simd_x86", since = "1.27.0")]
2955pub fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2956 unsafe { transmute(a) }
2957}
2958
2959#[inline]
2966#[target_feature(enable = "sse2")]
2967#[stable(feature = "simd_x86", since = "1.27.0")]
2968pub fn _mm_undefined_pd() -> __m128d {
2969 const { unsafe { mem::zeroed() } }
2970}
2971
2972#[inline]
2979#[target_feature(enable = "sse2")]
2980#[stable(feature = "simd_x86", since = "1.27.0")]
2981pub fn _mm_undefined_si128() -> __m128i {
2982 const { unsafe { mem::zeroed() } }
2983}
2984
2985#[inline]
2993#[target_feature(enable = "sse2")]
2994#[cfg_attr(test, assert_instr(unpckhpd))]
2995#[stable(feature = "simd_x86", since = "1.27.0")]
2996pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
2997 unsafe { simd_shuffle!(a, b, [1, 3]) }
2998}
2999
3000#[inline]
3008#[target_feature(enable = "sse2")]
3009#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlhps))]
3010#[stable(feature = "simd_x86", since = "1.27.0")]
3011pub fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
3012 unsafe { simd_shuffle!(a, b, [0, 2]) }
3013}
3014
3015#[allow(improper_ctypes)]
3016unsafe extern "C" {
3017 #[link_name = "llvm.x86.sse2.pause"]
3018 fn pause();
3019 #[link_name = "llvm.x86.sse2.clflush"]
3020 fn clflush(p: *const u8);
3021 #[link_name = "llvm.x86.sse2.lfence"]
3022 fn lfence();
3023 #[link_name = "llvm.x86.sse2.mfence"]
3024 fn mfence();
3025 #[link_name = "llvm.x86.sse2.pmadd.wd"]
3026 fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
3027 #[link_name = "llvm.x86.sse2.psad.bw"]
3028 fn psadbw(a: u8x16, b: u8x16) -> u64x2;
3029 #[link_name = "llvm.x86.sse2.psll.w"]
3030 fn psllw(a: i16x8, count: i16x8) -> i16x8;
3031 #[link_name = "llvm.x86.sse2.psll.d"]
3032 fn pslld(a: i32x4, count: i32x4) -> i32x4;
3033 #[link_name = "llvm.x86.sse2.psll.q"]
3034 fn psllq(a: i64x2, count: i64x2) -> i64x2;
3035 #[link_name = "llvm.x86.sse2.psra.w"]
3036 fn psraw(a: i16x8, count: i16x8) -> i16x8;
3037 #[link_name = "llvm.x86.sse2.psra.d"]
3038 fn psrad(a: i32x4, count: i32x4) -> i32x4;
3039 #[link_name = "llvm.x86.sse2.psrl.w"]
3040 fn psrlw(a: i16x8, count: i16x8) -> i16x8;
3041 #[link_name = "llvm.x86.sse2.psrl.d"]
3042 fn psrld(a: i32x4, count: i32x4) -> i32x4;
3043 #[link_name = "llvm.x86.sse2.psrl.q"]
3044 fn psrlq(a: i64x2, count: i64x2) -> i64x2;
3045 #[link_name = "llvm.x86.sse2.cvtps2dq"]
3046 fn cvtps2dq(a: __m128) -> i32x4;
3047 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3048 fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3049 #[link_name = "llvm.x86.sse2.packsswb.128"]
3050 fn packsswb(a: i16x8, b: i16x8) -> i8x16;
3051 #[link_name = "llvm.x86.sse2.packssdw.128"]
3052 fn packssdw(a: i32x4, b: i32x4) -> i16x8;
3053 #[link_name = "llvm.x86.sse2.packuswb.128"]
3054 fn packuswb(a: i16x8, b: i16x8) -> u8x16;
3055 #[link_name = "llvm.x86.sse2.max.sd"]
3056 fn maxsd(a: __m128d, b: __m128d) -> __m128d;
3057 #[link_name = "llvm.x86.sse2.max.pd"]
3058 fn maxpd(a: __m128d, b: __m128d) -> __m128d;
3059 #[link_name = "llvm.x86.sse2.min.sd"]
3060 fn minsd(a: __m128d, b: __m128d) -> __m128d;
3061 #[link_name = "llvm.x86.sse2.min.pd"]
3062 fn minpd(a: __m128d, b: __m128d) -> __m128d;
3063 #[link_name = "llvm.x86.sse2.cmp.sd"]
3064 fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3065 #[link_name = "llvm.x86.sse2.cmp.pd"]
3066 fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3067 #[link_name = "llvm.x86.sse2.comieq.sd"]
3068 fn comieqsd(a: __m128d, b: __m128d) -> i32;
3069 #[link_name = "llvm.x86.sse2.comilt.sd"]
3070 fn comiltsd(a: __m128d, b: __m128d) -> i32;
3071 #[link_name = "llvm.x86.sse2.comile.sd"]
3072 fn comilesd(a: __m128d, b: __m128d) -> i32;
3073 #[link_name = "llvm.x86.sse2.comigt.sd"]
3074 fn comigtsd(a: __m128d, b: __m128d) -> i32;
3075 #[link_name = "llvm.x86.sse2.comige.sd"]
3076 fn comigesd(a: __m128d, b: __m128d) -> i32;
3077 #[link_name = "llvm.x86.sse2.comineq.sd"]
3078 fn comineqsd(a: __m128d, b: __m128d) -> i32;
3079 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3080 fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3081 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3082 fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3083 #[link_name = "llvm.x86.sse2.ucomile.sd"]
3084 fn ucomilesd(a: __m128d, b: __m128d) -> i32;
3085 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3086 fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3087 #[link_name = "llvm.x86.sse2.ucomige.sd"]
3088 fn ucomigesd(a: __m128d, b: __m128d) -> i32;
3089 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3090 fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3091 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3092 fn cvtpd2dq(a: __m128d) -> i32x4;
3093 #[link_name = "llvm.x86.sse2.cvtsd2si"]
3094 fn cvtsd2si(a: __m128d) -> i32;
3095 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3096 fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3097 #[link_name = "llvm.x86.sse2.cvtss2sd"]
3098 fn cvtss2sd(a: __m128d, b: __m128) -> __m128d;
3099 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3100 fn cvttpd2dq(a: __m128d) -> i32x4;
3101 #[link_name = "llvm.x86.sse2.cvttsd2si"]
3102 fn cvttsd2si(a: __m128d) -> i32;
3103 #[link_name = "llvm.x86.sse2.cvttps2dq"]
3104 fn cvttps2dq(a: __m128) -> i32x4;
3105}
3106
3107#[cfg(test)]
3108mod tests {
3109 use crate::{
3110 core_arch::{simd::*, x86::*},
3111 hint::black_box,
3112 };
3113 use std::{
3114 boxed, f32, f64,
3115 mem::{self, transmute},
3116 ptr,
3117 };
3118 use stdarch_test::simd_test;
3119
3120 const NAN: f64 = f64::NAN;
3121
3122 #[test]
3123 fn test_mm_pause() {
3124 unsafe { _mm_pause() }
3125 }
3126
3127 #[simd_test(enable = "sse2")]
3128 unsafe fn test_mm_clflush() {
3129 let x = 0_u8;
3130 _mm_clflush(ptr::addr_of!(x));
3131 }
3132
3133 #[simd_test(enable = "sse2")]
3134 #[cfg_attr(miri, ignore)]
3136 unsafe fn test_mm_lfence() {
3137 _mm_lfence();
3138 }
3139
3140 #[simd_test(enable = "sse2")]
3141 #[cfg_attr(miri, ignore)]
3143 unsafe fn test_mm_mfence() {
3144 _mm_mfence();
3145 }
3146
3147 #[simd_test(enable = "sse2")]
3148 unsafe fn test_mm_add_epi8() {
3149 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3150 #[rustfmt::skip]
3151 let b = _mm_setr_epi8(
3152 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3153 );
3154 let r = _mm_add_epi8(a, b);
3155 #[rustfmt::skip]
3156 let e = _mm_setr_epi8(
3157 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3158 );
3159 assert_eq_m128i(r, e);
3160 }
3161
3162 #[simd_test(enable = "sse2")]
3163 unsafe fn test_mm_add_epi8_overflow() {
3164 let a = _mm_set1_epi8(0x7F);
3165 let b = _mm_set1_epi8(1);
3166 let r = _mm_add_epi8(a, b);
3167 assert_eq_m128i(r, _mm_set1_epi8(-128));
3168 }
3169
3170 #[simd_test(enable = "sse2")]
3171 unsafe fn test_mm_add_epi16() {
3172 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3173 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3174 let r = _mm_add_epi16(a, b);
3175 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3176 assert_eq_m128i(r, e);
3177 }
3178
3179 #[simd_test(enable = "sse2")]
3180 unsafe fn test_mm_add_epi32() {
3181 let a = _mm_setr_epi32(0, 1, 2, 3);
3182 let b = _mm_setr_epi32(4, 5, 6, 7);
3183 let r = _mm_add_epi32(a, b);
3184 let e = _mm_setr_epi32(4, 6, 8, 10);
3185 assert_eq_m128i(r, e);
3186 }
3187
3188 #[simd_test(enable = "sse2")]
3189 unsafe fn test_mm_add_epi64() {
3190 let a = _mm_setr_epi64x(0, 1);
3191 let b = _mm_setr_epi64x(2, 3);
3192 let r = _mm_add_epi64(a, b);
3193 let e = _mm_setr_epi64x(2, 4);
3194 assert_eq_m128i(r, e);
3195 }
3196
3197 #[simd_test(enable = "sse2")]
3198 unsafe fn test_mm_adds_epi8() {
3199 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3200 #[rustfmt::skip]
3201 let b = _mm_setr_epi8(
3202 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3203 );
3204 let r = _mm_adds_epi8(a, b);
3205 #[rustfmt::skip]
3206 let e = _mm_setr_epi8(
3207 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3208 );
3209 assert_eq_m128i(r, e);
3210 }
3211
3212 #[simd_test(enable = "sse2")]
3213 unsafe fn test_mm_adds_epi8_saturate_positive() {
3214 let a = _mm_set1_epi8(0x7F);
3215 let b = _mm_set1_epi8(1);
3216 let r = _mm_adds_epi8(a, b);
3217 assert_eq_m128i(r, a);
3218 }
3219
3220 #[simd_test(enable = "sse2")]
3221 unsafe fn test_mm_adds_epi8_saturate_negative() {
3222 let a = _mm_set1_epi8(-0x80);
3223 let b = _mm_set1_epi8(-1);
3224 let r = _mm_adds_epi8(a, b);
3225 assert_eq_m128i(r, a);
3226 }
3227
3228 #[simd_test(enable = "sse2")]
3229 unsafe fn test_mm_adds_epi16() {
3230 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3231 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3232 let r = _mm_adds_epi16(a, b);
3233 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3234 assert_eq_m128i(r, e);
3235 }
3236
3237 #[simd_test(enable = "sse2")]
3238 unsafe fn test_mm_adds_epi16_saturate_positive() {
3239 let a = _mm_set1_epi16(0x7FFF);
3240 let b = _mm_set1_epi16(1);
3241 let r = _mm_adds_epi16(a, b);
3242 assert_eq_m128i(r, a);
3243 }
3244
3245 #[simd_test(enable = "sse2")]
3246 unsafe fn test_mm_adds_epi16_saturate_negative() {
3247 let a = _mm_set1_epi16(-0x8000);
3248 let b = _mm_set1_epi16(-1);
3249 let r = _mm_adds_epi16(a, b);
3250 assert_eq_m128i(r, a);
3251 }
3252
3253 #[simd_test(enable = "sse2")]
3254 unsafe fn test_mm_adds_epu8() {
3255 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3256 #[rustfmt::skip]
3257 let b = _mm_setr_epi8(
3258 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3259 );
3260 let r = _mm_adds_epu8(a, b);
3261 #[rustfmt::skip]
3262 let e = _mm_setr_epi8(
3263 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3264 );
3265 assert_eq_m128i(r, e);
3266 }
3267
3268 #[simd_test(enable = "sse2")]
3269 unsafe fn test_mm_adds_epu8_saturate() {
3270 let a = _mm_set1_epi8(!0);
3271 let b = _mm_set1_epi8(1);
3272 let r = _mm_adds_epu8(a, b);
3273 assert_eq_m128i(r, a);
3274 }
3275
3276 #[simd_test(enable = "sse2")]
3277 unsafe fn test_mm_adds_epu16() {
3278 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3279 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3280 let r = _mm_adds_epu16(a, b);
3281 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3282 assert_eq_m128i(r, e);
3283 }
3284
3285 #[simd_test(enable = "sse2")]
3286 unsafe fn test_mm_adds_epu16_saturate() {
3287 let a = _mm_set1_epi16(!0);
3288 let b = _mm_set1_epi16(1);
3289 let r = _mm_adds_epu16(a, b);
3290 assert_eq_m128i(r, a);
3291 }
3292
3293 #[simd_test(enable = "sse2")]
3294 unsafe fn test_mm_avg_epu8() {
3295 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3296 let r = _mm_avg_epu8(a, b);
3297 assert_eq_m128i(r, _mm_set1_epi8(6));
3298 }
3299
3300 #[simd_test(enable = "sse2")]
3301 unsafe fn test_mm_avg_epu16() {
3302 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3303 let r = _mm_avg_epu16(a, b);
3304 assert_eq_m128i(r, _mm_set1_epi16(6));
3305 }
3306
3307 #[simd_test(enable = "sse2")]
3308 unsafe fn test_mm_madd_epi16() {
3309 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3310 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3311 let r = _mm_madd_epi16(a, b);
3312 let e = _mm_setr_epi32(29, 81, 149, 233);
3313 assert_eq_m128i(r, e);
3314
3315 let a = _mm_setr_epi16(
3318 i16::MAX,
3319 i16::MAX,
3320 i16::MIN,
3321 i16::MIN,
3322 i16::MIN,
3323 i16::MAX,
3324 0,
3325 0,
3326 );
3327 let b = _mm_setr_epi16(
3328 i16::MAX,
3329 i16::MAX,
3330 i16::MIN,
3331 i16::MIN,
3332 i16::MAX,
3333 i16::MIN,
3334 0,
3335 0,
3336 );
3337 let r = _mm_madd_epi16(a, b);
3338 let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3339 assert_eq_m128i(r, e);
3340 }
3341
3342 #[simd_test(enable = "sse2")]
3343 unsafe fn test_mm_max_epi16() {
3344 let a = _mm_set1_epi16(1);
3345 let b = _mm_set1_epi16(-1);
3346 let r = _mm_max_epi16(a, b);
3347 assert_eq_m128i(r, a);
3348 }
3349
3350 #[simd_test(enable = "sse2")]
3351 unsafe fn test_mm_max_epu8() {
3352 let a = _mm_set1_epi8(1);
3353 let b = _mm_set1_epi8(!0);
3354 let r = _mm_max_epu8(a, b);
3355 assert_eq_m128i(r, b);
3356 }
3357
3358 #[simd_test(enable = "sse2")]
3359 unsafe fn test_mm_min_epi16() {
3360 let a = _mm_set1_epi16(1);
3361 let b = _mm_set1_epi16(-1);
3362 let r = _mm_min_epi16(a, b);
3363 assert_eq_m128i(r, b);
3364 }
3365
3366 #[simd_test(enable = "sse2")]
3367 unsafe fn test_mm_min_epu8() {
3368 let a = _mm_set1_epi8(1);
3369 let b = _mm_set1_epi8(!0);
3370 let r = _mm_min_epu8(a, b);
3371 assert_eq_m128i(r, a);
3372 }
3373
3374 #[simd_test(enable = "sse2")]
3375 unsafe fn test_mm_mulhi_epi16() {
3376 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3377 let r = _mm_mulhi_epi16(a, b);
3378 assert_eq_m128i(r, _mm_set1_epi16(-16));
3379 }
3380
3381 #[simd_test(enable = "sse2")]
3382 unsafe fn test_mm_mulhi_epu16() {
3383 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3384 let r = _mm_mulhi_epu16(a, b);
3385 assert_eq_m128i(r, _mm_set1_epi16(15));
3386 }
3387
3388 #[simd_test(enable = "sse2")]
3389 unsafe fn test_mm_mullo_epi16() {
3390 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3391 let r = _mm_mullo_epi16(a, b);
3392 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3393 }
3394
3395 #[simd_test(enable = "sse2")]
3396 unsafe fn test_mm_mul_epu32() {
3397 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3398 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3399 let r = _mm_mul_epu32(a, b);
3400 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3401 assert_eq_m128i(r, e);
3402 }
3403
3404 #[simd_test(enable = "sse2")]
3405 unsafe fn test_mm_sad_epu8() {
3406 #[rustfmt::skip]
3407 let a = _mm_setr_epi8(
3408 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3409 1, 2, 3, 4,
3410 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3411 1, 2, 3, 4,
3412 );
3413 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3414 let r = _mm_sad_epu8(a, b);
3415 let e = _mm_setr_epi64x(1020, 614);
3416 assert_eq_m128i(r, e);
3417 }
3418
3419 #[simd_test(enable = "sse2")]
3420 unsafe fn test_mm_sub_epi8() {
3421 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3422 let r = _mm_sub_epi8(a, b);
3423 assert_eq_m128i(r, _mm_set1_epi8(-1));
3424 }
3425
3426 #[simd_test(enable = "sse2")]
3427 unsafe fn test_mm_sub_epi16() {
3428 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3429 let r = _mm_sub_epi16(a, b);
3430 assert_eq_m128i(r, _mm_set1_epi16(-1));
3431 }
3432
3433 #[simd_test(enable = "sse2")]
3434 unsafe fn test_mm_sub_epi32() {
3435 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3436 let r = _mm_sub_epi32(a, b);
3437 assert_eq_m128i(r, _mm_set1_epi32(-1));
3438 }
3439
3440 #[simd_test(enable = "sse2")]
3441 unsafe fn test_mm_sub_epi64() {
3442 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3443 let r = _mm_sub_epi64(a, b);
3444 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3445 }
3446
3447 #[simd_test(enable = "sse2")]
3448 unsafe fn test_mm_subs_epi8() {
3449 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3450 let r = _mm_subs_epi8(a, b);
3451 assert_eq_m128i(r, _mm_set1_epi8(3));
3452 }
3453
3454 #[simd_test(enable = "sse2")]
3455 unsafe fn test_mm_subs_epi8_saturate_positive() {
3456 let a = _mm_set1_epi8(0x7F);
3457 let b = _mm_set1_epi8(-1);
3458 let r = _mm_subs_epi8(a, b);
3459 assert_eq_m128i(r, a);
3460 }
3461
3462 #[simd_test(enable = "sse2")]
3463 unsafe fn test_mm_subs_epi8_saturate_negative() {
3464 let a = _mm_set1_epi8(-0x80);
3465 let b = _mm_set1_epi8(1);
3466 let r = _mm_subs_epi8(a, b);
3467 assert_eq_m128i(r, a);
3468 }
3469
3470 #[simd_test(enable = "sse2")]
3471 unsafe fn test_mm_subs_epi16() {
3472 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3473 let r = _mm_subs_epi16(a, b);
3474 assert_eq_m128i(r, _mm_set1_epi16(3));
3475 }
3476
3477 #[simd_test(enable = "sse2")]
3478 unsafe fn test_mm_subs_epi16_saturate_positive() {
3479 let a = _mm_set1_epi16(0x7FFF);
3480 let b = _mm_set1_epi16(-1);
3481 let r = _mm_subs_epi16(a, b);
3482 assert_eq_m128i(r, a);
3483 }
3484
3485 #[simd_test(enable = "sse2")]
3486 unsafe fn test_mm_subs_epi16_saturate_negative() {
3487 let a = _mm_set1_epi16(-0x8000);
3488 let b = _mm_set1_epi16(1);
3489 let r = _mm_subs_epi16(a, b);
3490 assert_eq_m128i(r, a);
3491 }
3492
3493 #[simd_test(enable = "sse2")]
3494 unsafe fn test_mm_subs_epu8() {
3495 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3496 let r = _mm_subs_epu8(a, b);
3497 assert_eq_m128i(r, _mm_set1_epi8(3));
3498 }
3499
3500 #[simd_test(enable = "sse2")]
3501 unsafe fn test_mm_subs_epu8_saturate() {
3502 let a = _mm_set1_epi8(0);
3503 let b = _mm_set1_epi8(1);
3504 let r = _mm_subs_epu8(a, b);
3505 assert_eq_m128i(r, a);
3506 }
3507
3508 #[simd_test(enable = "sse2")]
3509 unsafe fn test_mm_subs_epu16() {
3510 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3511 let r = _mm_subs_epu16(a, b);
3512 assert_eq_m128i(r, _mm_set1_epi16(3));
3513 }
3514
3515 #[simd_test(enable = "sse2")]
3516 unsafe fn test_mm_subs_epu16_saturate() {
3517 let a = _mm_set1_epi16(0);
3518 let b = _mm_set1_epi16(1);
3519 let r = _mm_subs_epu16(a, b);
3520 assert_eq_m128i(r, a);
3521 }
3522
3523 #[simd_test(enable = "sse2")]
3524 unsafe fn test_mm_slli_si128() {
3525 #[rustfmt::skip]
3526 let a = _mm_setr_epi8(
3527 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3528 );
3529 let r = _mm_slli_si128::<1>(a);
3530 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3531 assert_eq_m128i(r, e);
3532
3533 #[rustfmt::skip]
3534 let a = _mm_setr_epi8(
3535 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3536 );
3537 let r = _mm_slli_si128::<15>(a);
3538 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3539 assert_eq_m128i(r, e);
3540
3541 #[rustfmt::skip]
3542 let a = _mm_setr_epi8(
3543 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3544 );
3545 let r = _mm_slli_si128::<16>(a);
3546 assert_eq_m128i(r, _mm_set1_epi8(0));
3547 }
3548
3549 #[simd_test(enable = "sse2")]
3550 unsafe fn test_mm_slli_epi16() {
3551 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3552 let r = _mm_slli_epi16::<4>(a);
3553 assert_eq_m128i(
3554 r,
3555 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3556 );
3557 let r = _mm_slli_epi16::<16>(a);
3558 assert_eq_m128i(r, _mm_set1_epi16(0));
3559 }
3560
3561 #[simd_test(enable = "sse2")]
3562 unsafe fn test_mm_sll_epi16() {
3563 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3564 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3565 assert_eq_m128i(
3566 r,
3567 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3568 );
3569 let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3570 assert_eq_m128i(r, a);
3571 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3572 assert_eq_m128i(r, _mm_set1_epi16(0));
3573 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3574 assert_eq_m128i(r, _mm_set1_epi16(0));
3575 }
3576
3577 #[simd_test(enable = "sse2")]
3578 unsafe fn test_mm_slli_epi32() {
3579 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3580 let r = _mm_slli_epi32::<4>(a);
3581 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3582 let r = _mm_slli_epi32::<32>(a);
3583 assert_eq_m128i(r, _mm_set1_epi32(0));
3584 }
3585
3586 #[simd_test(enable = "sse2")]
3587 unsafe fn test_mm_sll_epi32() {
3588 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3589 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3590 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3591 let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3592 assert_eq_m128i(r, a);
3593 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3594 assert_eq_m128i(r, _mm_set1_epi32(0));
3595 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3596 assert_eq_m128i(r, _mm_set1_epi32(0));
3597 }
3598
3599 #[simd_test(enable = "sse2")]
3600 unsafe fn test_mm_slli_epi64() {
3601 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3602 let r = _mm_slli_epi64::<4>(a);
3603 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3604 let r = _mm_slli_epi64::<64>(a);
3605 assert_eq_m128i(r, _mm_set1_epi64x(0));
3606 }
3607
3608 #[simd_test(enable = "sse2")]
3609 unsafe fn test_mm_sll_epi64() {
3610 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3611 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3612 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3613 let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3614 assert_eq_m128i(r, a);
3615 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3616 assert_eq_m128i(r, _mm_set1_epi64x(0));
3617 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3618 assert_eq_m128i(r, _mm_set1_epi64x(0));
3619 }
3620
3621 #[simd_test(enable = "sse2")]
3622 unsafe fn test_mm_srai_epi16() {
3623 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3624 let r = _mm_srai_epi16::<4>(a);
3625 assert_eq_m128i(
3626 r,
3627 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3628 );
3629 let r = _mm_srai_epi16::<16>(a);
3630 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3631 }
3632
3633 #[simd_test(enable = "sse2")]
3634 unsafe fn test_mm_sra_epi16() {
3635 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3636 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3637 assert_eq_m128i(
3638 r,
3639 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3640 );
3641 let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3642 assert_eq_m128i(r, a);
3643 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3644 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3645 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3646 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3647 }
3648
3649 #[simd_test(enable = "sse2")]
3650 unsafe fn test_mm_srai_epi32() {
3651 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3652 let r = _mm_srai_epi32::<4>(a);
3653 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3654 let r = _mm_srai_epi32::<32>(a);
3655 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3656 }
3657
3658 #[simd_test(enable = "sse2")]
3659 unsafe fn test_mm_sra_epi32() {
3660 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3661 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3662 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3663 let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3664 assert_eq_m128i(r, a);
3665 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3666 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3667 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3668 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3669 }
3670
3671 #[simd_test(enable = "sse2")]
3672 unsafe fn test_mm_srli_si128() {
3673 #[rustfmt::skip]
3674 let a = _mm_setr_epi8(
3675 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3676 );
3677 let r = _mm_srli_si128::<1>(a);
3678 #[rustfmt::skip]
3679 let e = _mm_setr_epi8(
3680 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3681 );
3682 assert_eq_m128i(r, e);
3683
3684 #[rustfmt::skip]
3685 let a = _mm_setr_epi8(
3686 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3687 );
3688 let r = _mm_srli_si128::<15>(a);
3689 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3690 assert_eq_m128i(r, e);
3691
3692 #[rustfmt::skip]
3693 let a = _mm_setr_epi8(
3694 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3695 );
3696 let r = _mm_srli_si128::<16>(a);
3697 assert_eq_m128i(r, _mm_set1_epi8(0));
3698 }
3699
3700 #[simd_test(enable = "sse2")]
3701 unsafe fn test_mm_srli_epi16() {
3702 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3703 let r = _mm_srli_epi16::<4>(a);
3704 assert_eq_m128i(
3705 r,
3706 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3707 );
3708 let r = _mm_srli_epi16::<16>(a);
3709 assert_eq_m128i(r, _mm_set1_epi16(0));
3710 }
3711
3712 #[simd_test(enable = "sse2")]
3713 unsafe fn test_mm_srl_epi16() {
3714 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3715 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3716 assert_eq_m128i(
3717 r,
3718 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3719 );
3720 let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3721 assert_eq_m128i(r, a);
3722 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3723 assert_eq_m128i(r, _mm_set1_epi16(0));
3724 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3725 assert_eq_m128i(r, _mm_set1_epi16(0));
3726 }
3727
3728 #[simd_test(enable = "sse2")]
3729 unsafe fn test_mm_srli_epi32() {
3730 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3731 let r = _mm_srli_epi32::<4>(a);
3732 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3733 let r = _mm_srli_epi32::<32>(a);
3734 assert_eq_m128i(r, _mm_set1_epi32(0));
3735 }
3736
3737 #[simd_test(enable = "sse2")]
3738 unsafe fn test_mm_srl_epi32() {
3739 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3740 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3741 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3742 let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3743 assert_eq_m128i(r, a);
3744 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3745 assert_eq_m128i(r, _mm_set1_epi32(0));
3746 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3747 assert_eq_m128i(r, _mm_set1_epi32(0));
3748 }
3749
3750 #[simd_test(enable = "sse2")]
3751 unsafe fn test_mm_srli_epi64() {
3752 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3753 let r = _mm_srli_epi64::<4>(a);
3754 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3755 let r = _mm_srli_epi64::<64>(a);
3756 assert_eq_m128i(r, _mm_set1_epi64x(0));
3757 }
3758
3759 #[simd_test(enable = "sse2")]
3760 unsafe fn test_mm_srl_epi64() {
3761 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3762 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3763 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3764 let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3765 assert_eq_m128i(r, a);
3766 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3767 assert_eq_m128i(r, _mm_set1_epi64x(0));
3768 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3769 assert_eq_m128i(r, _mm_set1_epi64x(0));
3770 }
3771
3772 #[simd_test(enable = "sse2")]
3773 unsafe fn test_mm_and_si128() {
3774 let a = _mm_set1_epi8(5);
3775 let b = _mm_set1_epi8(3);
3776 let r = _mm_and_si128(a, b);
3777 assert_eq_m128i(r, _mm_set1_epi8(1));
3778 }
3779
3780 #[simd_test(enable = "sse2")]
3781 unsafe fn test_mm_andnot_si128() {
3782 let a = _mm_set1_epi8(5);
3783 let b = _mm_set1_epi8(3);
3784 let r = _mm_andnot_si128(a, b);
3785 assert_eq_m128i(r, _mm_set1_epi8(2));
3786 }
3787
3788 #[simd_test(enable = "sse2")]
3789 unsafe fn test_mm_or_si128() {
3790 let a = _mm_set1_epi8(5);
3791 let b = _mm_set1_epi8(3);
3792 let r = _mm_or_si128(a, b);
3793 assert_eq_m128i(r, _mm_set1_epi8(7));
3794 }
3795
3796 #[simd_test(enable = "sse2")]
3797 unsafe fn test_mm_xor_si128() {
3798 let a = _mm_set1_epi8(5);
3799 let b = _mm_set1_epi8(3);
3800 let r = _mm_xor_si128(a, b);
3801 assert_eq_m128i(r, _mm_set1_epi8(6));
3802 }
3803
3804 #[simd_test(enable = "sse2")]
3805 unsafe fn test_mm_cmpeq_epi8() {
3806 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3807 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3808 let r = _mm_cmpeq_epi8(a, b);
3809 #[rustfmt::skip]
3810 assert_eq_m128i(
3811 r,
3812 _mm_setr_epi8(
3813 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3814 )
3815 );
3816 }
3817
3818 #[simd_test(enable = "sse2")]
3819 unsafe fn test_mm_cmpeq_epi16() {
3820 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3821 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3822 let r = _mm_cmpeq_epi16(a, b);
3823 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3824 }
3825
3826 #[simd_test(enable = "sse2")]
3827 unsafe fn test_mm_cmpeq_epi32() {
3828 let a = _mm_setr_epi32(0, 1, 2, 3);
3829 let b = _mm_setr_epi32(3, 2, 2, 0);
3830 let r = _mm_cmpeq_epi32(a, b);
3831 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3832 }
3833
3834 #[simd_test(enable = "sse2")]
3835 unsafe fn test_mm_cmpgt_epi8() {
3836 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3837 let b = _mm_set1_epi8(0);
3838 let r = _mm_cmpgt_epi8(a, b);
3839 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3840 assert_eq_m128i(r, e);
3841 }
3842
3843 #[simd_test(enable = "sse2")]
3844 unsafe fn test_mm_cmpgt_epi16() {
3845 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3846 let b = _mm_set1_epi16(0);
3847 let r = _mm_cmpgt_epi16(a, b);
3848 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3849 assert_eq_m128i(r, e);
3850 }
3851
3852 #[simd_test(enable = "sse2")]
3853 unsafe fn test_mm_cmpgt_epi32() {
3854 let a = _mm_set_epi32(5, 0, 0, 0);
3855 let b = _mm_set1_epi32(0);
3856 let r = _mm_cmpgt_epi32(a, b);
3857 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3858 }
3859
3860 #[simd_test(enable = "sse2")]
3861 unsafe fn test_mm_cmplt_epi8() {
3862 let a = _mm_set1_epi8(0);
3863 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3864 let r = _mm_cmplt_epi8(a, b);
3865 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3866 assert_eq_m128i(r, e);
3867 }
3868
3869 #[simd_test(enable = "sse2")]
3870 unsafe fn test_mm_cmplt_epi16() {
3871 let a = _mm_set1_epi16(0);
3872 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3873 let r = _mm_cmplt_epi16(a, b);
3874 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3875 assert_eq_m128i(r, e);
3876 }
3877
3878 #[simd_test(enable = "sse2")]
3879 unsafe fn test_mm_cmplt_epi32() {
3880 let a = _mm_set1_epi32(0);
3881 let b = _mm_set_epi32(5, 0, 0, 0);
3882 let r = _mm_cmplt_epi32(a, b);
3883 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3884 }
3885
3886 #[simd_test(enable = "sse2")]
3887 unsafe fn test_mm_cvtepi32_pd() {
3888 let a = _mm_set_epi32(35, 25, 15, 5);
3889 let r = _mm_cvtepi32_pd(a);
3890 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3891 }
3892
3893 #[simd_test(enable = "sse2")]
3894 unsafe fn test_mm_cvtsi32_sd() {
3895 let a = _mm_set1_pd(3.5);
3896 let r = _mm_cvtsi32_sd(a, 5);
3897 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3898 }
3899
3900 #[simd_test(enable = "sse2")]
3901 unsafe fn test_mm_cvtepi32_ps() {
3902 let a = _mm_setr_epi32(1, 2, 3, 4);
3903 let r = _mm_cvtepi32_ps(a);
3904 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3905 }
3906
3907 #[simd_test(enable = "sse2")]
3908 unsafe fn test_mm_cvtps_epi32() {
3909 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3910 let r = _mm_cvtps_epi32(a);
3911 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3912 }
3913
3914 #[simd_test(enable = "sse2")]
3915 unsafe fn test_mm_cvtsi32_si128() {
3916 let r = _mm_cvtsi32_si128(5);
3917 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3918 }
3919
3920 #[simd_test(enable = "sse2")]
3921 unsafe fn test_mm_cvtsi128_si32() {
3922 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3923 assert_eq!(r, 5);
3924 }
3925
3926 #[simd_test(enable = "sse2")]
3927 unsafe fn test_mm_set_epi64x() {
3928 let r = _mm_set_epi64x(0, 1);
3929 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3930 }
3931
3932 #[simd_test(enable = "sse2")]
3933 unsafe fn test_mm_set_epi32() {
3934 let r = _mm_set_epi32(0, 1, 2, 3);
3935 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3936 }
3937
3938 #[simd_test(enable = "sse2")]
3939 unsafe fn test_mm_set_epi16() {
3940 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3941 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3942 }
3943
3944 #[simd_test(enable = "sse2")]
3945 unsafe fn test_mm_set_epi8() {
3946 #[rustfmt::skip]
3947 let r = _mm_set_epi8(
3948 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3949 );
3950 #[rustfmt::skip]
3951 let e = _mm_setr_epi8(
3952 15, 14, 13, 12, 11, 10, 9, 8,
3953 7, 6, 5, 4, 3, 2, 1, 0,
3954 );
3955 assert_eq_m128i(r, e);
3956 }
3957
3958 #[simd_test(enable = "sse2")]
3959 unsafe fn test_mm_set1_epi64x() {
3960 let r = _mm_set1_epi64x(1);
3961 assert_eq_m128i(r, _mm_set1_epi64x(1));
3962 }
3963
3964 #[simd_test(enable = "sse2")]
3965 unsafe fn test_mm_set1_epi32() {
3966 let r = _mm_set1_epi32(1);
3967 assert_eq_m128i(r, _mm_set1_epi32(1));
3968 }
3969
3970 #[simd_test(enable = "sse2")]
3971 unsafe fn test_mm_set1_epi16() {
3972 let r = _mm_set1_epi16(1);
3973 assert_eq_m128i(r, _mm_set1_epi16(1));
3974 }
3975
3976 #[simd_test(enable = "sse2")]
3977 unsafe fn test_mm_set1_epi8() {
3978 let r = _mm_set1_epi8(1);
3979 assert_eq_m128i(r, _mm_set1_epi8(1));
3980 }
3981
3982 #[simd_test(enable = "sse2")]
3983 unsafe fn test_mm_setr_epi32() {
3984 let r = _mm_setr_epi32(0, 1, 2, 3);
3985 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
3986 }
3987
3988 #[simd_test(enable = "sse2")]
3989 unsafe fn test_mm_setr_epi16() {
3990 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3991 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
3992 }
3993
3994 #[simd_test(enable = "sse2")]
3995 unsafe fn test_mm_setr_epi8() {
3996 #[rustfmt::skip]
3997 let r = _mm_setr_epi8(
3998 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3999 );
4000 #[rustfmt::skip]
4001 let e = _mm_setr_epi8(
4002 0, 1, 2, 3, 4, 5, 6, 7,
4003 8, 9, 10, 11, 12, 13, 14, 15,
4004 );
4005 assert_eq_m128i(r, e);
4006 }
4007
4008 #[simd_test(enable = "sse2")]
4009 unsafe fn test_mm_setzero_si128() {
4010 let r = _mm_setzero_si128();
4011 assert_eq_m128i(r, _mm_set1_epi64x(0));
4012 }
4013
4014 #[simd_test(enable = "sse2")]
4015 unsafe fn test_mm_loadl_epi64() {
4016 let a = _mm_setr_epi64x(6, 5);
4017 let r = _mm_loadl_epi64(ptr::addr_of!(a));
4018 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
4019 }
4020
4021 #[simd_test(enable = "sse2")]
4022 unsafe fn test_mm_load_si128() {
4023 let a = _mm_set_epi64x(5, 6);
4024 let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
4025 assert_eq_m128i(a, r);
4026 }
4027
4028 #[simd_test(enable = "sse2")]
4029 unsafe fn test_mm_loadu_si128() {
4030 let a = _mm_set_epi64x(5, 6);
4031 let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
4032 assert_eq_m128i(a, r);
4033 }
4034
4035 #[simd_test(enable = "sse2")]
4036 #[cfg_attr(miri, ignore)]
4039 unsafe fn test_mm_maskmoveu_si128() {
4040 let a = _mm_set1_epi8(9);
4041 #[rustfmt::skip]
4042 let mask = _mm_set_epi8(
4043 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
4044 0, 0, 0, 0, 0, 0, 0, 0,
4045 );
4046 let mut r = _mm_set1_epi8(0);
4047 _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
4048 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4049 assert_eq_m128i(r, e);
4050 }
4051
4052 #[simd_test(enable = "sse2")]
4053 unsafe fn test_mm_store_si128() {
4054 let a = _mm_set1_epi8(9);
4055 let mut r = _mm_set1_epi8(0);
4056 _mm_store_si128(&mut r, a);
4057 assert_eq_m128i(r, a);
4058 }
4059
4060 #[simd_test(enable = "sse2")]
4061 unsafe fn test_mm_storeu_si128() {
4062 let a = _mm_set1_epi8(9);
4063 let mut r = _mm_set1_epi8(0);
4064 _mm_storeu_si128(&mut r, a);
4065 assert_eq_m128i(r, a);
4066 }
4067
4068 #[simd_test(enable = "sse2")]
4069 unsafe fn test_mm_storel_epi64() {
4070 let a = _mm_setr_epi64x(2, 9);
4071 let mut r = _mm_set1_epi8(0);
4072 _mm_storel_epi64(&mut r, a);
4073 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4074 }
4075
4076 #[simd_test(enable = "sse2")]
4077 #[cfg_attr(miri, ignore)]
4080 unsafe fn test_mm_stream_si128() {
4081 let a = _mm_setr_epi32(1, 2, 3, 4);
4082 let mut r = _mm_undefined_si128();
4083 _mm_stream_si128(ptr::addr_of_mut!(r), a);
4084 assert_eq_m128i(r, a);
4085 }
4086
4087 #[simd_test(enable = "sse2")]
4088 #[cfg_attr(miri, ignore)]
4091 unsafe fn test_mm_stream_si32() {
4092 let a: i32 = 7;
4093 let mut mem = boxed::Box::<i32>::new(-1);
4094 _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4095 assert_eq!(a, *mem);
4096 }
4097
4098 #[simd_test(enable = "sse2")]
4099 unsafe fn test_mm_move_epi64() {
4100 let a = _mm_setr_epi64x(5, 6);
4101 let r = _mm_move_epi64(a);
4102 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4103 }
4104
4105 #[simd_test(enable = "sse2")]
4106 unsafe fn test_mm_packs_epi16() {
4107 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4108 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4109 let r = _mm_packs_epi16(a, b);
4110 #[rustfmt::skip]
4111 assert_eq_m128i(
4112 r,
4113 _mm_setr_epi8(
4114 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4115 )
4116 );
4117 }
4118
4119 #[simd_test(enable = "sse2")]
4120 unsafe fn test_mm_packs_epi32() {
4121 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4122 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4123 let r = _mm_packs_epi32(a, b);
4124 assert_eq_m128i(
4125 r,
4126 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4127 );
4128 }
4129
4130 #[simd_test(enable = "sse2")]
4131 unsafe fn test_mm_packus_epi16() {
4132 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4133 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4134 let r = _mm_packus_epi16(a, b);
4135 assert_eq_m128i(
4136 r,
4137 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4138 );
4139 }
4140
4141 #[simd_test(enable = "sse2")]
4142 unsafe fn test_mm_extract_epi16() {
4143 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4144 let r1 = _mm_extract_epi16::<0>(a);
4145 let r2 = _mm_extract_epi16::<3>(a);
4146 assert_eq!(r1, 0xFFFF);
4147 assert_eq!(r2, 3);
4148 }
4149
4150 #[simd_test(enable = "sse2")]
4151 unsafe fn test_mm_insert_epi16() {
4152 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4153 let r = _mm_insert_epi16::<0>(a, 9);
4154 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4155 assert_eq_m128i(r, e);
4156 }
4157
4158 #[simd_test(enable = "sse2")]
4159 unsafe fn test_mm_movemask_epi8() {
4160 #[rustfmt::skip]
4161 let a = _mm_setr_epi8(
4162 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4163 0b0101, 0b1111_0000u8 as i8, 0, 0,
4164 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4165 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4166 );
4167 let r = _mm_movemask_epi8(a);
4168 assert_eq!(r, 0b10100110_00100101);
4169 }
4170
4171 #[simd_test(enable = "sse2")]
4172 unsafe fn test_mm_shuffle_epi32() {
4173 let a = _mm_setr_epi32(5, 10, 15, 20);
4174 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4175 let e = _mm_setr_epi32(20, 10, 10, 5);
4176 assert_eq_m128i(r, e);
4177 }
4178
4179 #[simd_test(enable = "sse2")]
4180 unsafe fn test_mm_shufflehi_epi16() {
4181 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4182 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4183 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4184 assert_eq_m128i(r, e);
4185 }
4186
4187 #[simd_test(enable = "sse2")]
4188 unsafe fn test_mm_shufflelo_epi16() {
4189 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4190 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4191 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4192 assert_eq_m128i(r, e);
4193 }
4194
4195 #[simd_test(enable = "sse2")]
4196 unsafe fn test_mm_unpackhi_epi8() {
4197 #[rustfmt::skip]
4198 let a = _mm_setr_epi8(
4199 0, 1, 2, 3, 4, 5, 6, 7,
4200 8, 9, 10, 11, 12, 13, 14, 15,
4201 );
4202 #[rustfmt::skip]
4203 let b = _mm_setr_epi8(
4204 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4205 );
4206 let r = _mm_unpackhi_epi8(a, b);
4207 #[rustfmt::skip]
4208 let e = _mm_setr_epi8(
4209 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4210 );
4211 assert_eq_m128i(r, e);
4212 }
4213
4214 #[simd_test(enable = "sse2")]
4215 unsafe fn test_mm_unpackhi_epi16() {
4216 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4217 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4218 let r = _mm_unpackhi_epi16(a, b);
4219 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4220 assert_eq_m128i(r, e);
4221 }
4222
4223 #[simd_test(enable = "sse2")]
4224 unsafe fn test_mm_unpackhi_epi32() {
4225 let a = _mm_setr_epi32(0, 1, 2, 3);
4226 let b = _mm_setr_epi32(4, 5, 6, 7);
4227 let r = _mm_unpackhi_epi32(a, b);
4228 let e = _mm_setr_epi32(2, 6, 3, 7);
4229 assert_eq_m128i(r, e);
4230 }
4231
4232 #[simd_test(enable = "sse2")]
4233 unsafe fn test_mm_unpackhi_epi64() {
4234 let a = _mm_setr_epi64x(0, 1);
4235 let b = _mm_setr_epi64x(2, 3);
4236 let r = _mm_unpackhi_epi64(a, b);
4237 let e = _mm_setr_epi64x(1, 3);
4238 assert_eq_m128i(r, e);
4239 }
4240
4241 #[simd_test(enable = "sse2")]
4242 unsafe fn test_mm_unpacklo_epi8() {
4243 #[rustfmt::skip]
4244 let a = _mm_setr_epi8(
4245 0, 1, 2, 3, 4, 5, 6, 7,
4246 8, 9, 10, 11, 12, 13, 14, 15,
4247 );
4248 #[rustfmt::skip]
4249 let b = _mm_setr_epi8(
4250 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4251 );
4252 let r = _mm_unpacklo_epi8(a, b);
4253 #[rustfmt::skip]
4254 let e = _mm_setr_epi8(
4255 0, 16, 1, 17, 2, 18, 3, 19,
4256 4, 20, 5, 21, 6, 22, 7, 23,
4257 );
4258 assert_eq_m128i(r, e);
4259 }
4260
4261 #[simd_test(enable = "sse2")]
4262 unsafe fn test_mm_unpacklo_epi16() {
4263 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4264 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4265 let r = _mm_unpacklo_epi16(a, b);
4266 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4267 assert_eq_m128i(r, e);
4268 }
4269
4270 #[simd_test(enable = "sse2")]
4271 unsafe fn test_mm_unpacklo_epi32() {
4272 let a = _mm_setr_epi32(0, 1, 2, 3);
4273 let b = _mm_setr_epi32(4, 5, 6, 7);
4274 let r = _mm_unpacklo_epi32(a, b);
4275 let e = _mm_setr_epi32(0, 4, 1, 5);
4276 assert_eq_m128i(r, e);
4277 }
4278
4279 #[simd_test(enable = "sse2")]
4280 unsafe fn test_mm_unpacklo_epi64() {
4281 let a = _mm_setr_epi64x(0, 1);
4282 let b = _mm_setr_epi64x(2, 3);
4283 let r = _mm_unpacklo_epi64(a, b);
4284 let e = _mm_setr_epi64x(0, 2);
4285 assert_eq_m128i(r, e);
4286 }
4287
4288 #[simd_test(enable = "sse2")]
4289 unsafe fn test_mm_add_sd() {
4290 let a = _mm_setr_pd(1.0, 2.0);
4291 let b = _mm_setr_pd(5.0, 10.0);
4292 let r = _mm_add_sd(a, b);
4293 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4294 }
4295
4296 #[simd_test(enable = "sse2")]
4297 unsafe fn test_mm_add_pd() {
4298 let a = _mm_setr_pd(1.0, 2.0);
4299 let b = _mm_setr_pd(5.0, 10.0);
4300 let r = _mm_add_pd(a, b);
4301 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4302 }
4303
4304 #[simd_test(enable = "sse2")]
4305 unsafe fn test_mm_div_sd() {
4306 let a = _mm_setr_pd(1.0, 2.0);
4307 let b = _mm_setr_pd(5.0, 10.0);
4308 let r = _mm_div_sd(a, b);
4309 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4310 }
4311
4312 #[simd_test(enable = "sse2")]
4313 unsafe fn test_mm_div_pd() {
4314 let a = _mm_setr_pd(1.0, 2.0);
4315 let b = _mm_setr_pd(5.0, 10.0);
4316 let r = _mm_div_pd(a, b);
4317 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4318 }
4319
4320 #[simd_test(enable = "sse2")]
4321 unsafe fn test_mm_max_sd() {
4322 let a = _mm_setr_pd(1.0, 2.0);
4323 let b = _mm_setr_pd(5.0, 10.0);
4324 let r = _mm_max_sd(a, b);
4325 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4326 }
4327
4328 #[simd_test(enable = "sse2")]
4329 unsafe fn test_mm_max_pd() {
4330 let a = _mm_setr_pd(1.0, 2.0);
4331 let b = _mm_setr_pd(5.0, 10.0);
4332 let r = _mm_max_pd(a, b);
4333 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4334
4335 let a = _mm_setr_pd(-0.0, 0.0);
4337 let b = _mm_setr_pd(0.0, 0.0);
4338 let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
4339 let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
4340 let a: [u8; 16] = transmute(a);
4341 let b: [u8; 16] = transmute(b);
4342 assert_eq!(r1, b);
4343 assert_eq!(r2, a);
4344 assert_ne!(a, b); }
4346
4347 #[simd_test(enable = "sse2")]
4348 unsafe fn test_mm_min_sd() {
4349 let a = _mm_setr_pd(1.0, 2.0);
4350 let b = _mm_setr_pd(5.0, 10.0);
4351 let r = _mm_min_sd(a, b);
4352 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4353 }
4354
4355 #[simd_test(enable = "sse2")]
4356 unsafe fn test_mm_min_pd() {
4357 let a = _mm_setr_pd(1.0, 2.0);
4358 let b = _mm_setr_pd(5.0, 10.0);
4359 let r = _mm_min_pd(a, b);
4360 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4361
4362 let a = _mm_setr_pd(-0.0, 0.0);
4364 let b = _mm_setr_pd(0.0, 0.0);
4365 let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
4366 let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
4367 let a: [u8; 16] = transmute(a);
4368 let b: [u8; 16] = transmute(b);
4369 assert_eq!(r1, b);
4370 assert_eq!(r2, a);
4371 assert_ne!(a, b); }
4373
4374 #[simd_test(enable = "sse2")]
4375 unsafe fn test_mm_mul_sd() {
4376 let a = _mm_setr_pd(1.0, 2.0);
4377 let b = _mm_setr_pd(5.0, 10.0);
4378 let r = _mm_mul_sd(a, b);
4379 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4380 }
4381
4382 #[simd_test(enable = "sse2")]
4383 unsafe fn test_mm_mul_pd() {
4384 let a = _mm_setr_pd(1.0, 2.0);
4385 let b = _mm_setr_pd(5.0, 10.0);
4386 let r = _mm_mul_pd(a, b);
4387 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4388 }
4389
4390 #[simd_test(enable = "sse2")]
4391 unsafe fn test_mm_sqrt_sd() {
4392 let a = _mm_setr_pd(1.0, 2.0);
4393 let b = _mm_setr_pd(5.0, 10.0);
4394 let r = _mm_sqrt_sd(a, b);
4395 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4396 }
4397
4398 #[simd_test(enable = "sse2")]
4399 unsafe fn test_mm_sqrt_pd() {
4400 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4401 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4402 }
4403
4404 #[simd_test(enable = "sse2")]
4405 unsafe fn test_mm_sub_sd() {
4406 let a = _mm_setr_pd(1.0, 2.0);
4407 let b = _mm_setr_pd(5.0, 10.0);
4408 let r = _mm_sub_sd(a, b);
4409 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4410 }
4411
4412 #[simd_test(enable = "sse2")]
4413 unsafe fn test_mm_sub_pd() {
4414 let a = _mm_setr_pd(1.0, 2.0);
4415 let b = _mm_setr_pd(5.0, 10.0);
4416 let r = _mm_sub_pd(a, b);
4417 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4418 }
4419
4420 #[simd_test(enable = "sse2")]
4421 unsafe fn test_mm_and_pd() {
4422 let a = transmute(u64x2::splat(5));
4423 let b = transmute(u64x2::splat(3));
4424 let r = _mm_and_pd(a, b);
4425 let e = transmute(u64x2::splat(1));
4426 assert_eq_m128d(r, e);
4427 }
4428
4429 #[simd_test(enable = "sse2")]
4430 unsafe fn test_mm_andnot_pd() {
4431 let a = transmute(u64x2::splat(5));
4432 let b = transmute(u64x2::splat(3));
4433 let r = _mm_andnot_pd(a, b);
4434 let e = transmute(u64x2::splat(2));
4435 assert_eq_m128d(r, e);
4436 }
4437
4438 #[simd_test(enable = "sse2")]
4439 unsafe fn test_mm_or_pd() {
4440 let a = transmute(u64x2::splat(5));
4441 let b = transmute(u64x2::splat(3));
4442 let r = _mm_or_pd(a, b);
4443 let e = transmute(u64x2::splat(7));
4444 assert_eq_m128d(r, e);
4445 }
4446
4447 #[simd_test(enable = "sse2")]
4448 unsafe fn test_mm_xor_pd() {
4449 let a = transmute(u64x2::splat(5));
4450 let b = transmute(u64x2::splat(3));
4451 let r = _mm_xor_pd(a, b);
4452 let e = transmute(u64x2::splat(6));
4453 assert_eq_m128d(r, e);
4454 }
4455
4456 #[simd_test(enable = "sse2")]
4457 unsafe fn test_mm_cmpeq_sd() {
4458 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4459 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4460 let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4461 assert_eq_m128i(r, e);
4462 }
4463
4464 #[simd_test(enable = "sse2")]
4465 unsafe fn test_mm_cmplt_sd() {
4466 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4467 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4468 let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4469 assert_eq_m128i(r, e);
4470 }
4471
4472 #[simd_test(enable = "sse2")]
4473 unsafe fn test_mm_cmple_sd() {
4474 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4475 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4476 let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4477 assert_eq_m128i(r, e);
4478 }
4479
4480 #[simd_test(enable = "sse2")]
4481 unsafe fn test_mm_cmpgt_sd() {
4482 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4483 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4484 let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4485 assert_eq_m128i(r, e);
4486 }
4487
4488 #[simd_test(enable = "sse2")]
4489 unsafe fn test_mm_cmpge_sd() {
4490 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4491 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4492 let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4493 assert_eq_m128i(r, e);
4494 }
4495
4496 #[simd_test(enable = "sse2")]
4497 unsafe fn test_mm_cmpord_sd() {
4498 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4499 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4500 let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4501 assert_eq_m128i(r, e);
4502 }
4503
4504 #[simd_test(enable = "sse2")]
4505 unsafe fn test_mm_cmpunord_sd() {
4506 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4507 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4508 let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4509 assert_eq_m128i(r, e);
4510 }
4511
4512 #[simd_test(enable = "sse2")]
4513 unsafe fn test_mm_cmpneq_sd() {
4514 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4515 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4516 let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4517 assert_eq_m128i(r, e);
4518 }
4519
4520 #[simd_test(enable = "sse2")]
4521 unsafe fn test_mm_cmpnlt_sd() {
4522 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4523 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4524 let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4525 assert_eq_m128i(r, e);
4526 }
4527
4528 #[simd_test(enable = "sse2")]
4529 unsafe fn test_mm_cmpnle_sd() {
4530 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4531 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4532 let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4533 assert_eq_m128i(r, e);
4534 }
4535
4536 #[simd_test(enable = "sse2")]
4537 unsafe fn test_mm_cmpngt_sd() {
4538 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4539 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4540 let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4541 assert_eq_m128i(r, e);
4542 }
4543
4544 #[simd_test(enable = "sse2")]
4545 unsafe fn test_mm_cmpnge_sd() {
4546 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4547 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4548 let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4549 assert_eq_m128i(r, e);
4550 }
4551
4552 #[simd_test(enable = "sse2")]
4553 unsafe fn test_mm_cmpeq_pd() {
4554 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4555 let e = _mm_setr_epi64x(!0, 0);
4556 let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4557 assert_eq_m128i(r, e);
4558 }
4559
4560 #[simd_test(enable = "sse2")]
4561 unsafe fn test_mm_cmplt_pd() {
4562 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4563 let e = _mm_setr_epi64x(0, !0);
4564 let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4565 assert_eq_m128i(r, e);
4566 }
4567
4568 #[simd_test(enable = "sse2")]
4569 unsafe fn test_mm_cmple_pd() {
4570 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4571 let e = _mm_setr_epi64x(!0, !0);
4572 let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4573 assert_eq_m128i(r, e);
4574 }
4575
4576 #[simd_test(enable = "sse2")]
4577 unsafe fn test_mm_cmpgt_pd() {
4578 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4579 let e = _mm_setr_epi64x(0, 0);
4580 let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4581 assert_eq_m128i(r, e);
4582 }
4583
4584 #[simd_test(enable = "sse2")]
4585 unsafe fn test_mm_cmpge_pd() {
4586 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4587 let e = _mm_setr_epi64x(!0, 0);
4588 let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4589 assert_eq_m128i(r, e);
4590 }
4591
4592 #[simd_test(enable = "sse2")]
4593 unsafe fn test_mm_cmpord_pd() {
4594 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4595 let e = _mm_setr_epi64x(0, !0);
4596 let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4597 assert_eq_m128i(r, e);
4598 }
4599
4600 #[simd_test(enable = "sse2")]
4601 unsafe fn test_mm_cmpunord_pd() {
4602 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4603 let e = _mm_setr_epi64x(!0, 0);
4604 let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4605 assert_eq_m128i(r, e);
4606 }
4607
4608 #[simd_test(enable = "sse2")]
4609 unsafe fn test_mm_cmpneq_pd() {
4610 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4611 let e = _mm_setr_epi64x(!0, !0);
4612 let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4613 assert_eq_m128i(r, e);
4614 }
4615
4616 #[simd_test(enable = "sse2")]
4617 unsafe fn test_mm_cmpnlt_pd() {
4618 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4619 let e = _mm_setr_epi64x(0, 0);
4620 let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4621 assert_eq_m128i(r, e);
4622 }
4623
4624 #[simd_test(enable = "sse2")]
4625 unsafe fn test_mm_cmpnle_pd() {
4626 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4627 let e = _mm_setr_epi64x(0, 0);
4628 let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4629 assert_eq_m128i(r, e);
4630 }
4631
4632 #[simd_test(enable = "sse2")]
4633 unsafe fn test_mm_cmpngt_pd() {
4634 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4635 let e = _mm_setr_epi64x(0, !0);
4636 let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4637 assert_eq_m128i(r, e);
4638 }
4639
4640 #[simd_test(enable = "sse2")]
4641 unsafe fn test_mm_cmpnge_pd() {
4642 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4643 let e = _mm_setr_epi64x(0, !0);
4644 let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4645 assert_eq_m128i(r, e);
4646 }
4647
4648 #[simd_test(enable = "sse2")]
4649 unsafe fn test_mm_comieq_sd() {
4650 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4651 assert!(_mm_comieq_sd(a, b) != 0);
4652
4653 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4654 assert!(_mm_comieq_sd(a, b) == 0);
4655 }
4656
4657 #[simd_test(enable = "sse2")]
4658 unsafe fn test_mm_comilt_sd() {
4659 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4660 assert!(_mm_comilt_sd(a, b) == 0);
4661 }
4662
4663 #[simd_test(enable = "sse2")]
4664 unsafe fn test_mm_comile_sd() {
4665 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4666 assert!(_mm_comile_sd(a, b) != 0);
4667 }
4668
4669 #[simd_test(enable = "sse2")]
4670 unsafe fn test_mm_comigt_sd() {
4671 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4672 assert!(_mm_comigt_sd(a, b) == 0);
4673 }
4674
4675 #[simd_test(enable = "sse2")]
4676 unsafe fn test_mm_comige_sd() {
4677 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4678 assert!(_mm_comige_sd(a, b) != 0);
4679 }
4680
4681 #[simd_test(enable = "sse2")]
4682 unsafe fn test_mm_comineq_sd() {
4683 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4684 assert!(_mm_comineq_sd(a, b) == 0);
4685 }
4686
4687 #[simd_test(enable = "sse2")]
4688 unsafe fn test_mm_ucomieq_sd() {
4689 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4690 assert!(_mm_ucomieq_sd(a, b) != 0);
4691
4692 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4693 assert!(_mm_ucomieq_sd(a, b) == 0);
4694 }
4695
4696 #[simd_test(enable = "sse2")]
4697 unsafe fn test_mm_ucomilt_sd() {
4698 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4699 assert!(_mm_ucomilt_sd(a, b) == 0);
4700 }
4701
4702 #[simd_test(enable = "sse2")]
4703 unsafe fn test_mm_ucomile_sd() {
4704 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4705 assert!(_mm_ucomile_sd(a, b) != 0);
4706 }
4707
4708 #[simd_test(enable = "sse2")]
4709 unsafe fn test_mm_ucomigt_sd() {
4710 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4711 assert!(_mm_ucomigt_sd(a, b) == 0);
4712 }
4713
4714 #[simd_test(enable = "sse2")]
4715 unsafe fn test_mm_ucomige_sd() {
4716 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4717 assert!(_mm_ucomige_sd(a, b) != 0);
4718 }
4719
4720 #[simd_test(enable = "sse2")]
4721 unsafe fn test_mm_ucomineq_sd() {
4722 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4723 assert!(_mm_ucomineq_sd(a, b) == 0);
4724 }
4725
4726 #[simd_test(enable = "sse2")]
4727 unsafe fn test_mm_movemask_pd() {
4728 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4729 assert_eq!(r, 0b01);
4730
4731 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4732 assert_eq!(r, 0b11);
4733 }
4734
4735 #[repr(align(16))]
4736 struct Memory {
4737 data: [f64; 4],
4738 }
4739
4740 #[simd_test(enable = "sse2")]
4741 unsafe fn test_mm_load_pd() {
4742 let mem = Memory {
4743 data: [1.0f64, 2.0, 3.0, 4.0],
4744 };
4745 let vals = &mem.data;
4746 let d = vals.as_ptr();
4747
4748 let r = _mm_load_pd(d);
4749 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4750 }
4751
4752 #[simd_test(enable = "sse2")]
4753 unsafe fn test_mm_load_sd() {
4754 let a = 1.;
4755 let expected = _mm_setr_pd(a, 0.);
4756 let r = _mm_load_sd(&a);
4757 assert_eq_m128d(r, expected);
4758 }
4759
4760 #[simd_test(enable = "sse2")]
4761 unsafe fn test_mm_loadh_pd() {
4762 let a = _mm_setr_pd(1., 2.);
4763 let b = 3.;
4764 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4765 let r = _mm_loadh_pd(a, &b);
4766 assert_eq_m128d(r, expected);
4767 }
4768
4769 #[simd_test(enable = "sse2")]
4770 unsafe fn test_mm_loadl_pd() {
4771 let a = _mm_setr_pd(1., 2.);
4772 let b = 3.;
4773 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4774 let r = _mm_loadl_pd(a, &b);
4775 assert_eq_m128d(r, expected);
4776 }
4777
4778 #[simd_test(enable = "sse2")]
4779 #[cfg_attr(miri, ignore)]
4782 unsafe fn test_mm_stream_pd() {
4783 #[repr(align(128))]
4784 struct Memory {
4785 pub data: [f64; 2],
4786 }
4787 let a = _mm_set1_pd(7.0);
4788 let mut mem = Memory { data: [-1.0; 2] };
4789
4790 _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4791 for i in 0..2 {
4792 assert_eq!(mem.data[i], get_m128d(a, i));
4793 }
4794 }
4795
4796 #[simd_test(enable = "sse2")]
4797 unsafe fn test_mm_store_sd() {
4798 let mut dest = 0.;
4799 let a = _mm_setr_pd(1., 2.);
4800 _mm_store_sd(&mut dest, a);
4801 assert_eq!(dest, _mm_cvtsd_f64(a));
4802 }
4803
4804 #[simd_test(enable = "sse2")]
4805 unsafe fn test_mm_store_pd() {
4806 let mut mem = Memory { data: [0.0f64; 4] };
4807 let vals = &mut mem.data;
4808 let a = _mm_setr_pd(1.0, 2.0);
4809 let d = vals.as_mut_ptr();
4810
4811 _mm_store_pd(d, *black_box(&a));
4812 assert_eq!(vals[0], 1.0);
4813 assert_eq!(vals[1], 2.0);
4814 }
4815
4816 #[simd_test(enable = "sse2")]
4817 unsafe fn test_mm_storeu_pd() {
4818 let mut mem = Memory { data: [0.0f64; 4] };
4819 let vals = &mut mem.data;
4820 let a = _mm_setr_pd(1.0, 2.0);
4821
4822 let mut ofs = 0;
4823 let mut p = vals.as_mut_ptr();
4824
4825 if (p as usize) & 0xf == 0 {
4827 ofs = 1;
4828 p = p.add(1);
4829 }
4830
4831 _mm_storeu_pd(p, *black_box(&a));
4832
4833 if ofs > 0 {
4834 assert_eq!(vals[ofs - 1], 0.0);
4835 }
4836 assert_eq!(vals[ofs + 0], 1.0);
4837 assert_eq!(vals[ofs + 1], 2.0);
4838 }
4839
4840 #[simd_test(enable = "sse2")]
4841 unsafe fn test_mm_storeu_si16() {
4842 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4843 let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
4844 _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
4845 let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
4846 assert_eq_m128i(r, e);
4847 }
4848
4849 #[simd_test(enable = "sse2")]
4850 unsafe fn test_mm_storeu_si32() {
4851 let a = _mm_setr_epi32(1, 2, 3, 4);
4852 let mut r = _mm_setr_epi32(5, 6, 7, 8);
4853 _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
4854 let e = _mm_setr_epi32(1, 6, 7, 8);
4855 assert_eq_m128i(r, e);
4856 }
4857
4858 #[simd_test(enable = "sse2")]
4859 unsafe fn test_mm_storeu_si64() {
4860 let a = _mm_setr_epi64x(1, 2);
4861 let mut r = _mm_setr_epi64x(3, 4);
4862 _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
4863 let e = _mm_setr_epi64x(1, 4);
4864 assert_eq_m128i(r, e);
4865 }
4866
4867 #[simd_test(enable = "sse2")]
4868 unsafe fn test_mm_store1_pd() {
4869 let mut mem = Memory { data: [0.0f64; 4] };
4870 let vals = &mut mem.data;
4871 let a = _mm_setr_pd(1.0, 2.0);
4872 let d = vals.as_mut_ptr();
4873
4874 _mm_store1_pd(d, *black_box(&a));
4875 assert_eq!(vals[0], 1.0);
4876 assert_eq!(vals[1], 1.0);
4877 }
4878
4879 #[simd_test(enable = "sse2")]
4880 unsafe fn test_mm_store_pd1() {
4881 let mut mem = Memory { data: [0.0f64; 4] };
4882 let vals = &mut mem.data;
4883 let a = _mm_setr_pd(1.0, 2.0);
4884 let d = vals.as_mut_ptr();
4885
4886 _mm_store_pd1(d, *black_box(&a));
4887 assert_eq!(vals[0], 1.0);
4888 assert_eq!(vals[1], 1.0);
4889 }
4890
4891 #[simd_test(enable = "sse2")]
4892 unsafe fn test_mm_storer_pd() {
4893 let mut mem = Memory { data: [0.0f64; 4] };
4894 let vals = &mut mem.data;
4895 let a = _mm_setr_pd(1.0, 2.0);
4896 let d = vals.as_mut_ptr();
4897
4898 _mm_storer_pd(d, *black_box(&a));
4899 assert_eq!(vals[0], 2.0);
4900 assert_eq!(vals[1], 1.0);
4901 }
4902
4903 #[simd_test(enable = "sse2")]
4904 unsafe fn test_mm_storeh_pd() {
4905 let mut dest = 0.;
4906 let a = _mm_setr_pd(1., 2.);
4907 _mm_storeh_pd(&mut dest, a);
4908 assert_eq!(dest, get_m128d(a, 1));
4909 }
4910
4911 #[simd_test(enable = "sse2")]
4912 unsafe fn test_mm_storel_pd() {
4913 let mut dest = 0.;
4914 let a = _mm_setr_pd(1., 2.);
4915 _mm_storel_pd(&mut dest, a);
4916 assert_eq!(dest, _mm_cvtsd_f64(a));
4917 }
4918
4919 #[simd_test(enable = "sse2")]
4920 unsafe fn test_mm_loadr_pd() {
4921 let mut mem = Memory {
4922 data: [1.0f64, 2.0, 3.0, 4.0],
4923 };
4924 let vals = &mut mem.data;
4925 let d = vals.as_ptr();
4926
4927 let r = _mm_loadr_pd(d);
4928 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4929 }
4930
4931 #[simd_test(enable = "sse2")]
4932 unsafe fn test_mm_loadu_pd() {
4933 let mut mem = Memory {
4934 data: [1.0f64, 2.0, 3.0, 4.0],
4935 };
4936 let vals = &mut mem.data;
4937 let mut d = vals.as_ptr();
4938
4939 let mut offset = 0;
4941 if (d as usize) & 0xf == 0 {
4942 offset = 1;
4943 d = d.add(offset);
4944 }
4945
4946 let r = _mm_loadu_pd(d);
4947 let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
4948 assert_eq_m128d(r, e);
4949 }
4950
4951 #[simd_test(enable = "sse2")]
4952 unsafe fn test_mm_loadu_si16() {
4953 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4954 let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
4955 assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
4956 }
4957
4958 #[simd_test(enable = "sse2")]
4959 unsafe fn test_mm_loadu_si32() {
4960 let a = _mm_setr_epi32(1, 2, 3, 4);
4961 let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
4962 assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
4963 }
4964
4965 #[simd_test(enable = "sse2")]
4966 unsafe fn test_mm_loadu_si64() {
4967 let a = _mm_setr_epi64x(5, 6);
4968 let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
4969 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4970 }
4971
4972 #[simd_test(enable = "sse2")]
4973 unsafe fn test_mm_cvtpd_ps() {
4974 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
4975 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
4976
4977 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
4978 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
4979
4980 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
4981 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
4982
4983 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
4984 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
4985 }
4986
4987 #[simd_test(enable = "sse2")]
4988 unsafe fn test_mm_cvtps_pd() {
4989 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
4990 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
4991
4992 let r = _mm_cvtps_pd(_mm_setr_ps(
4993 f32::MAX,
4994 f32::INFINITY,
4995 f32::NEG_INFINITY,
4996 f32::MIN,
4997 ));
4998 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
4999 }
5000
5001 #[simd_test(enable = "sse2")]
5002 unsafe fn test_mm_cvtpd_epi32() {
5003 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
5004 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
5005
5006 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
5007 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
5008
5009 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
5010 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5011
5012 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
5013 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5014
5015 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
5016 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5017 }
5018
5019 #[simd_test(enable = "sse2")]
5020 unsafe fn test_mm_cvtsd_si32() {
5021 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
5022 assert_eq!(r, -2);
5023
5024 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
5025 assert_eq!(r, i32::MIN);
5026
5027 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
5028 assert_eq!(r, i32::MIN);
5029 }
5030
5031 #[simd_test(enable = "sse2")]
5032 unsafe fn test_mm_cvtsd_ss() {
5033 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
5034 let b = _mm_setr_pd(2.0, -5.0);
5035
5036 let r = _mm_cvtsd_ss(a, b);
5037
5038 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
5039
5040 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
5041 let b = _mm_setr_pd(f64::INFINITY, -5.0);
5042
5043 let r = _mm_cvtsd_ss(a, b);
5044
5045 assert_eq_m128(
5046 r,
5047 _mm_setr_ps(
5048 f32::INFINITY,
5049 f32::NEG_INFINITY,
5050 f32::MAX,
5051 f32::NEG_INFINITY,
5052 ),
5053 );
5054 }
5055
5056 #[simd_test(enable = "sse2")]
5057 unsafe fn test_mm_cvtsd_f64() {
5058 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
5059 assert_eq!(r, -1.1);
5060 }
5061
5062 #[simd_test(enable = "sse2")]
5063 unsafe fn test_mm_cvtss_sd() {
5064 let a = _mm_setr_pd(-1.1, 2.2);
5065 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
5066
5067 let r = _mm_cvtss_sd(a, b);
5068 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
5069
5070 let a = _mm_setr_pd(-1.1, f64::INFINITY);
5071 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
5072
5073 let r = _mm_cvtss_sd(a, b);
5074 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5075 }
5076
5077 #[simd_test(enable = "sse2")]
5078 unsafe fn test_mm_cvttpd_epi32() {
5079 let a = _mm_setr_pd(-1.1, 2.2);
5080 let r = _mm_cvttpd_epi32(a);
5081 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
5082
5083 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5084 let r = _mm_cvttpd_epi32(a);
5085 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5086 }
5087
5088 #[simd_test(enable = "sse2")]
5089 unsafe fn test_mm_cvttsd_si32() {
5090 let a = _mm_setr_pd(-1.1, 2.2);
5091 let r = _mm_cvttsd_si32(a);
5092 assert_eq!(r, -1);
5093
5094 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5095 let r = _mm_cvttsd_si32(a);
5096 assert_eq!(r, i32::MIN);
5097 }
5098
5099 #[simd_test(enable = "sse2")]
5100 unsafe fn test_mm_cvttps_epi32() {
5101 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
5102 let r = _mm_cvttps_epi32(a);
5103 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
5104
5105 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5106 let r = _mm_cvttps_epi32(a);
5107 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5108 }
5109
5110 #[simd_test(enable = "sse2")]
5111 unsafe fn test_mm_set_sd() {
5112 let r = _mm_set_sd(-1.0_f64);
5113 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
5114 }
5115
5116 #[simd_test(enable = "sse2")]
5117 unsafe fn test_mm_set1_pd() {
5118 let r = _mm_set1_pd(-1.0_f64);
5119 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
5120 }
5121
5122 #[simd_test(enable = "sse2")]
5123 unsafe fn test_mm_set_pd1() {
5124 let r = _mm_set_pd1(-2.0_f64);
5125 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
5126 }
5127
5128 #[simd_test(enable = "sse2")]
5129 unsafe fn test_mm_set_pd() {
5130 let r = _mm_set_pd(1.0_f64, 5.0_f64);
5131 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
5132 }
5133
5134 #[simd_test(enable = "sse2")]
5135 unsafe fn test_mm_setr_pd() {
5136 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
5137 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
5138 }
5139
5140 #[simd_test(enable = "sse2")]
5141 unsafe fn test_mm_setzero_pd() {
5142 let r = _mm_setzero_pd();
5143 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
5144 }
5145
5146 #[simd_test(enable = "sse2")]
5147 unsafe fn test_mm_load1_pd() {
5148 let d = -5.0;
5149 let r = _mm_load1_pd(&d);
5150 assert_eq_m128d(r, _mm_setr_pd(d, d));
5151 }
5152
5153 #[simd_test(enable = "sse2")]
5154 unsafe fn test_mm_load_pd1() {
5155 let d = -5.0;
5156 let r = _mm_load_pd1(&d);
5157 assert_eq_m128d(r, _mm_setr_pd(d, d));
5158 }
5159
5160 #[simd_test(enable = "sse2")]
5161 unsafe fn test_mm_unpackhi_pd() {
5162 let a = _mm_setr_pd(1.0, 2.0);
5163 let b = _mm_setr_pd(3.0, 4.0);
5164 let r = _mm_unpackhi_pd(a, b);
5165 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5166 }
5167
5168 #[simd_test(enable = "sse2")]
5169 unsafe fn test_mm_unpacklo_pd() {
5170 let a = _mm_setr_pd(1.0, 2.0);
5171 let b = _mm_setr_pd(3.0, 4.0);
5172 let r = _mm_unpacklo_pd(a, b);
5173 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5174 }
5175
5176 #[simd_test(enable = "sse2")]
5177 unsafe fn test_mm_shuffle_pd() {
5178 let a = _mm_setr_pd(1., 2.);
5179 let b = _mm_setr_pd(3., 4.);
5180 let expected = _mm_setr_pd(1., 3.);
5181 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
5182 assert_eq_m128d(r, expected);
5183 }
5184
5185 #[simd_test(enable = "sse2")]
5186 unsafe fn test_mm_move_sd() {
5187 let a = _mm_setr_pd(1., 2.);
5188 let b = _mm_setr_pd(3., 4.);
5189 let expected = _mm_setr_pd(3., 2.);
5190 let r = _mm_move_sd(a, b);
5191 assert_eq_m128d(r, expected);
5192 }
5193
5194 #[simd_test(enable = "sse2")]
5195 unsafe fn test_mm_castpd_ps() {
5196 let a = _mm_set1_pd(0.);
5197 let expected = _mm_set1_ps(0.);
5198 let r = _mm_castpd_ps(a);
5199 assert_eq_m128(r, expected);
5200 }
5201
5202 #[simd_test(enable = "sse2")]
5203 unsafe fn test_mm_castpd_si128() {
5204 let a = _mm_set1_pd(0.);
5205 let expected = _mm_set1_epi64x(0);
5206 let r = _mm_castpd_si128(a);
5207 assert_eq_m128i(r, expected);
5208 }
5209
5210 #[simd_test(enable = "sse2")]
5211 unsafe fn test_mm_castps_pd() {
5212 let a = _mm_set1_ps(0.);
5213 let expected = _mm_set1_pd(0.);
5214 let r = _mm_castps_pd(a);
5215 assert_eq_m128d(r, expected);
5216 }
5217
5218 #[simd_test(enable = "sse2")]
5219 unsafe fn test_mm_castps_si128() {
5220 let a = _mm_set1_ps(0.);
5221 let expected = _mm_set1_epi32(0);
5222 let r = _mm_castps_si128(a);
5223 assert_eq_m128i(r, expected);
5224 }
5225
5226 #[simd_test(enable = "sse2")]
5227 unsafe fn test_mm_castsi128_pd() {
5228 let a = _mm_set1_epi64x(0);
5229 let expected = _mm_set1_pd(0.);
5230 let r = _mm_castsi128_pd(a);
5231 assert_eq_m128d(r, expected);
5232 }
5233
5234 #[simd_test(enable = "sse2")]
5235 unsafe fn test_mm_castsi128_ps() {
5236 let a = _mm_set1_epi32(0);
5237 let expected = _mm_set1_ps(0.);
5238 let r = _mm_castsi128_ps(a);
5239 assert_eq_m128(r, expected);
5240 }
5241}