@@ -18,48 +18,46 @@ macro_rules! p {
18
18
}
19
19
20
20
pub fn find < ' tcx > ( _tcx : & ty:: ctxt < ' tcx > , name : & str ) -> Option < Intrinsic > {
21
- Some ( match name {
22
- "mm_movemask_ps" => p ! ( "sse.movmsk.ps" , ( f32x4) -> i32 ) ,
23
- "mm_movemask_pd" => p ! ( "sse2.movmsk.pd" , ( f64x2) -> i32 ) ,
24
- "mm_movemask_epi8" => p ! ( "sse2.pmovmskb.128" , ( i8x16) -> i32 ) ,
25
-
26
- "mm_rsqrt_ps" => p ! ( "sse.rsqrt.ps" , ( f32x4) -> f32x4) ,
27
- "mm_rcp_ps" => p ! ( "sse.rcp.ps" , ( f32x4) -> f32x4) ,
28
-
29
- "mm_sqrt_ps" => plain ! ( "llvm.sqrt.v4f32" , ( f32x4) -> f32x4) ,
30
- "mm_sqrt_pd" => plain ! ( "llvm.sqrt.v2f64" , ( f64x2) -> f64x2) ,
31
-
32
- "mm_max_ps" => p ! ( "sse.max.ps" , ( f32x4, f32x4) -> f32x4) ,
33
- "mm_max_pd" => p ! ( "sse2.max.pd" , ( f64x2, f64x2) -> f64x2) ,
34
-
35
- "mm_min_ps" => p ! ( "sse.min.ps" , ( f32x4, f32x4) -> f32x4) ,
36
- "mm_min_pd" => p ! ( "sse2.min.pd" , ( f64x2, f64x2) -> f64x2) ,
37
-
38
- "mm_shuffle_epi8" => p ! ( "ssse3.pshuf.b.128" , ( i8x16, i8x16) -> i8x16) ,
39
-
40
- "mm_adds_epi16" => p ! ( "sse2.padds.w" , ( i16x8, i16x8) -> i16x8) ,
41
- "mm_adds_epi8" => p ! ( "sse2.padds.b" , ( i8x16, i8x16) -> i8x16) ,
42
- "mm_adds_epu16" => p ! ( "sse2.paddus.w" , ( i16x8, i16x8) -> i16x8) ,
43
- "mm_adds_epu8" => p ! ( "sse2.paddus.b" , ( i8x16, i8x16) -> i8x16) ,
44
- "mm_avg_epu16" => p ! ( "sse2.pavg.w" , ( i16x8, i16x8) -> i16x8) ,
45
- "mm_avg_epu8" => p ! ( "sse2.pavg.b" , ( i8x16, i8x16) -> i8x16) ,
46
- "mm_madd_epi16" => p ! ( "sse2.pmadd.wd" , ( i16x8, i16x8) -> i32x4) ,
47
- "mm_max_epi16" => p ! ( "sse2.pmaxs.w" , ( i16x8, i16x8) -> i16x8) ,
48
- "mm_max_epu8" => p ! ( "sse2.pmaxu.b" , ( i8x16, i8x16) -> i8x16) ,
49
- "mm_min_epi16" => p ! ( "sse2.pmins.w" , ( i16x8, i16x8) -> i16x8) ,
50
- "mm_min_epu8" => p ! ( "sse2.pminu.b" , ( i8x16, i8x16) -> i8x16) ,
51
- "mm_mul_epu32" => p ! ( "sse2.pmulu.dq" , ( i32x4, i32x4) -> i64x2) ,
52
- "mm_mulhi_epi16" => p ! ( "sse2.pmulh.w" , ( i8x16, i8x16) -> i8x16) ,
53
- "mm_mulhi_epu16" => p ! ( "sse2.pmulhu.w" , ( i8x16, i8x16) -> i8x16) ,
54
- "mm_packs_epi16" => p ! ( "sse2.packsswb.128" , ( i16x8, i16x8) -> i8x16) ,
55
- "mm_packs_epi32" => p ! ( "sse2.packssdw.128" , ( i32x4, i32x4) -> i16x8) ,
56
- "mm_packus_epi16" => p ! ( "sse2.packuswb.128" , ( i16x8, i16x8) -> i8x16) ,
57
- "mm_sad_epu8" => p ! ( "sse2.psad.bw" , ( i8x16, i8x16) -> i64x2) ,
58
- "mm_subs_epi16" => p ! ( "sse2.psubs.w" , ( i16x8, i16x8) -> i16x8) ,
59
- "mm_subs_epi8" => p ! ( "sse2.psubs.b" , ( i8x16, i8x16) -> i8x16) ,
60
- "mm_subs_epu16" => p ! ( "sse2.psubus.w" , ( i16x8, i16x8) -> i16x8) ,
61
- "mm_subs_epu8" => p ! ( "sse2.psubus.b" , ( i8x16, i8x16) -> i8x16) ,
62
-
21
+ if !name. starts_with ( "mm_" ) { return None }
22
+
23
+ Some ( match & name[ "mm_" . len ( ) ..] {
24
+ "sqrt_ps" => plain ! ( "llvm.sqrt.v4f32" , ( f32x4) -> f32x4) ,
25
+ "sqrt_pd" => plain ! ( "llvm.sqrt.v2f64" , ( f64x2) -> f64x2) ,
26
+
27
+ "movemask_ps" => p ! ( "sse.movmsk.ps" , ( f32x4) -> i32 ) ,
28
+ "max_ps" => p ! ( "sse.max.ps" , ( f32x4, f32x4) -> f32x4) ,
29
+ "min_ps" => p ! ( "sse.min.ps" , ( f32x4, f32x4) -> f32x4) ,
30
+ "rsqrt_ps" => p ! ( "sse.rsqrt.ps" , ( f32x4) -> f32x4) ,
31
+ "rcp_ps" => p ! ( "sse.rcp.ps" , ( f32x4) -> f32x4) ,
32
+
33
+ "adds_epi16" => p ! ( "sse2.padds.w" , ( i16x8, i16x8) -> i16x8) ,
34
+ "adds_epi8" => p ! ( "sse2.padds.b" , ( i8x16, i8x16) -> i8x16) ,
35
+ "adds_epu16" => p ! ( "sse2.paddus.w" , ( i16x8, i16x8) -> i16x8) ,
36
+ "adds_epu8" => p ! ( "sse2.paddus.b" , ( i8x16, i8x16) -> i8x16) ,
37
+ "avg_epu16" => p ! ( "sse2.pavg.w" , ( i16x8, i16x8) -> i16x8) ,
38
+ "avg_epu8" => p ! ( "sse2.pavg.b" , ( i8x16, i8x16) -> i8x16) ,
39
+ "madd_epi16" => p ! ( "sse2.pmadd.wd" , ( i16x8, i16x8) -> i32x4) ,
40
+ "max_epi16" => p ! ( "sse2.pmaxs.w" , ( i16x8, i16x8) -> i16x8) ,
41
+ "max_epu8" => p ! ( "sse2.pmaxu.b" , ( i8x16, i8x16) -> i8x16) ,
42
+ "max_pd" => p ! ( "sse2.max.pd" , ( f64x2, f64x2) -> f64x2) ,
43
+ "min_epi16" => p ! ( "sse2.pmins.w" , ( i16x8, i16x8) -> i16x8) ,
44
+ "min_epu8" => p ! ( "sse2.pminu.b" , ( i8x16, i8x16) -> i8x16) ,
45
+ "min_pd" => p ! ( "sse2.min.pd" , ( f64x2, f64x2) -> f64x2) ,
46
+ "movemask_pd" => p ! ( "sse2.movmsk.pd" , ( f64x2) -> i32 ) ,
47
+ "movemask_epi8" => p ! ( "sse2.pmovmskb.128" , ( i8x16) -> i32 ) ,
48
+ "mul_epu32" => p ! ( "sse2.pmulu.dq" , ( i32x4, i32x4) -> i64x2) ,
49
+ "mulhi_epi16" => p ! ( "sse2.pmulh.w" , ( i8x16, i8x16) -> i8x16) ,
50
+ "mulhi_epu16" => p ! ( "sse2.pmulhu.w" , ( i8x16, i8x16) -> i8x16) ,
51
+ "packs_epi16" => p ! ( "sse2.packsswb.128" , ( i16x8, i16x8) -> i8x16) ,
52
+ "packs_epi32" => p ! ( "sse2.packssdw.128" , ( i32x4, i32x4) -> i16x8) ,
53
+ "packus_epi16" => p ! ( "sse2.packuswb.128" , ( i16x8, i16x8) -> i8x16) ,
54
+ "sad_epu8" => p ! ( "sse2.psad.bw" , ( i8x16, i8x16) -> i64x2) ,
55
+ "subs_epi16" => p ! ( "sse2.psubs.w" , ( i16x8, i16x8) -> i16x8) ,
56
+ "subs_epi8" => p ! ( "sse2.psubs.b" , ( i8x16, i8x16) -> i8x16) ,
57
+ "subs_epu16" => p ! ( "sse2.psubus.w" , ( i16x8, i16x8) -> i16x8) ,
58
+ "subs_epu8" => p ! ( "sse2.psubus.b" , ( i8x16, i8x16) -> i8x16) ,
59
+
60
+ "shuffle_epi8" => p ! ( "ssse3.pshuf.b.128" , ( i8x16, i8x16) -> i8x16) ,
63
61
_ => return None
64
62
} )
65
63
}
0 commit comments