@@ -18,82 +18,88 @@ macro_rules! p {
18
18
}
19
19
20
20
pub fn find < ' tcx > ( _tcx : & ty:: ctxt < ' tcx > , name : & str ) -> Option < Intrinsic > {
21
- if !name. starts_with ( "mm_" ) { return None }
21
+ if name. starts_with ( "mm_" ) {
22
+ Some ( match & name[ "mm_" . len ( ) ..] {
23
+ "sqrt_ps" => plain ! ( "llvm.sqrt.v4f32" , ( f32x4) -> f32x4) ,
24
+ "sqrt_pd" => plain ! ( "llvm.sqrt.v2f64" , ( f64x2) -> f64x2) ,
22
25
23
- Some ( match & name[ "mm_" . len ( ) ..] {
24
- "sqrt_ps" => plain ! ( "llvm.sqrt.v4f32" , ( f32x4) -> f32x4) ,
25
- "sqrt_pd" => plain ! ( "llvm.sqrt.v2f64" , ( f64x2) -> f64x2) ,
26
+ "movemask_ps" => p ! ( "sse.movmsk.ps" , ( f32x4) -> i32 ) ,
27
+ "max_ps" => p ! ( "sse.max.ps" , ( f32x4, f32x4) -> f32x4) ,
28
+ "min_ps" => p ! ( "sse.min.ps" , ( f32x4, f32x4) -> f32x4) ,
29
+ "rsqrt_ps" => p ! ( "sse.rsqrt.ps" , ( f32x4) -> f32x4) ,
30
+ "rcp_ps" => p ! ( "sse.rcp.ps" , ( f32x4) -> f32x4) ,
26
31
27
- "movemask_ps" => p ! ( "sse.movmsk.ps" , ( f32x4) -> i32 ) ,
28
- "max_ps" => p ! ( "sse.max.ps" , ( f32x4, f32x4) -> f32x4) ,
29
- "min_ps" => p ! ( "sse.min.ps" , ( f32x4, f32x4) -> f32x4) ,
30
- "rsqrt_ps" => p ! ( "sse.rsqrt.ps" , ( f32x4) -> f32x4) ,
31
- "rcp_ps" => p ! ( "sse.rcp.ps" , ( f32x4) -> f32x4) ,
32
+ "adds_epi16" => p ! ( "sse2.padds.w" , ( i16x8, i16x8) -> i16x8) ,
33
+ "adds_epi8" => p ! ( "sse2.padds.b" , ( i8x16, i8x16) -> i8x16) ,
34
+ "adds_epu16" => p ! ( "sse2.paddus.w" , ( i16x8, i16x8) -> i16x8) ,
35
+ "adds_epu8" => p ! ( "sse2.paddus.b" , ( i8x16, i8x16) -> i8x16) ,
36
+ "avg_epu16" => p ! ( "sse2.pavg.w" , ( i16x8, i16x8) -> i16x8) ,
37
+ "avg_epu8" => p ! ( "sse2.pavg.b" , ( i8x16, i8x16) -> i8x16) ,
38
+ "madd_epi16" => p ! ( "sse2.pmadd.wd" , ( i16x8, i16x8) -> i32x4) ,
39
+ "max_epi16" => p ! ( "sse2.pmaxs.w" , ( i16x8, i16x8) -> i16x8) ,
40
+ "max_epu8" => p ! ( "sse2.pmaxu.b" , ( i8x16, i8x16) -> i8x16) ,
41
+ "max_pd" => p ! ( "sse2.max.pd" , ( f64x2, f64x2) -> f64x2) ,
42
+ "min_epi16" => p ! ( "sse2.pmins.w" , ( i16x8, i16x8) -> i16x8) ,
43
+ "min_epu8" => p ! ( "sse2.pminu.b" , ( i8x16, i8x16) -> i8x16) ,
44
+ "min_pd" => p ! ( "sse2.min.pd" , ( f64x2, f64x2) -> f64x2) ,
45
+ "movemask_pd" => p ! ( "sse2.movmsk.pd" , ( f64x2) -> i32 ) ,
46
+ "movemask_epi8" => p ! ( "sse2.pmovmskb.128" , ( i8x16) -> i32 ) ,
47
+ "mul_epu32" => p ! ( "sse2.pmulu.dq" , ( i32x4, i32x4) -> i64x2) ,
48
+ "mulhi_epi16" => p ! ( "sse2.pmulh.w" , ( i8x16, i8x16) -> i8x16) ,
49
+ "mulhi_epu16" => p ! ( "sse2.pmulhu.w" , ( i8x16, i8x16) -> i8x16) ,
50
+ "packs_epi16" => p ! ( "sse2.packsswb.128" , ( i16x8, i16x8) -> i8x16) ,
51
+ "packs_epi32" => p ! ( "sse2.packssdw.128" , ( i32x4, i32x4) -> i16x8) ,
52
+ "packus_epi16" => p ! ( "sse2.packuswb.128" , ( i16x8, i16x8) -> i8x16) ,
53
+ "sad_epu8" => p ! ( "sse2.psad.bw" , ( i8x16, i8x16) -> i64x2) ,
54
+ "subs_epi16" => p ! ( "sse2.psubs.w" , ( i16x8, i16x8) -> i16x8) ,
55
+ "subs_epi8" => p ! ( "sse2.psubs.b" , ( i8x16, i8x16) -> i8x16) ,
56
+ "subs_epu16" => p ! ( "sse2.psubus.w" , ( i16x8, i16x8) -> i16x8) ,
57
+ "subs_epu8" => p ! ( "sse2.psubus.b" , ( i8x16, i8x16) -> i8x16) ,
32
58
33
- "adds_epi16" => p ! ( "sse2.padds.w" , ( i16x8, i16x8) -> i16x8) ,
34
- "adds_epi8" => p ! ( "sse2.padds.b" , ( i8x16, i8x16) -> i8x16) ,
35
- "adds_epu16" => p ! ( "sse2.paddus.w" , ( i16x8, i16x8) -> i16x8) ,
36
- "adds_epu8" => p ! ( "sse2.paddus.b" , ( i8x16, i8x16) -> i8x16) ,
37
- "avg_epu16" => p ! ( "sse2.pavg.w" , ( i16x8, i16x8) -> i16x8) ,
38
- "avg_epu8" => p ! ( "sse2.pavg.b" , ( i8x16, i8x16) -> i8x16) ,
39
- "madd_epi16" => p ! ( "sse2.pmadd.wd" , ( i16x8, i16x8) -> i32x4) ,
40
- "max_epi16" => p ! ( "sse2.pmaxs.w" , ( i16x8, i16x8) -> i16x8) ,
41
- "max_epu8" => p ! ( "sse2.pmaxu.b" , ( i8x16, i8x16) -> i8x16) ,
42
- "max_pd" => p ! ( "sse2.max.pd" , ( f64x2, f64x2) -> f64x2) ,
43
- "min_epi16" => p ! ( "sse2.pmins.w" , ( i16x8, i16x8) -> i16x8) ,
44
- "min_epu8" => p ! ( "sse2.pminu.b" , ( i8x16, i8x16) -> i8x16) ,
45
- "min_pd" => p ! ( "sse2.min.pd" , ( f64x2, f64x2) -> f64x2) ,
46
- "movemask_pd" => p ! ( "sse2.movmsk.pd" , ( f64x2) -> i32 ) ,
47
- "movemask_epi8" => p ! ( "sse2.pmovmskb.128" , ( i8x16) -> i32 ) ,
48
- "mul_epu32" => p ! ( "sse2.pmulu.dq" , ( i32x4, i32x4) -> i64x2) ,
49
- "mulhi_epi16" => p ! ( "sse2.pmulh.w" , ( i8x16, i8x16) -> i8x16) ,
50
- "mulhi_epu16" => p ! ( "sse2.pmulhu.w" , ( i8x16, i8x16) -> i8x16) ,
51
- "packs_epi16" => p ! ( "sse2.packsswb.128" , ( i16x8, i16x8) -> i8x16) ,
52
- "packs_epi32" => p ! ( "sse2.packssdw.128" , ( i32x4, i32x4) -> i16x8) ,
53
- "packus_epi16" => p ! ( "sse2.packuswb.128" , ( i16x8, i16x8) -> i8x16) ,
54
- "sad_epu8" => p ! ( "sse2.psad.bw" , ( i8x16, i8x16) -> i64x2) ,
55
- "subs_epi16" => p ! ( "sse2.psubs.w" , ( i16x8, i16x8) -> i16x8) ,
56
- "subs_epi8" => p ! ( "sse2.psubs.b" , ( i8x16, i8x16) -> i8x16) ,
57
- "subs_epu16" => p ! ( "sse2.psubus.w" , ( i16x8, i16x8) -> i16x8) ,
58
- "subs_epu8" => p ! ( "sse2.psubus.b" , ( i8x16, i8x16) -> i8x16) ,
59
+ "addsub_pd" => p ! ( "sse3.addsub.pd" , ( f64x2, f64x2) -> f64x2) ,
60
+ "addsub_ps" => p ! ( "sse3.addsub.ps" , ( f32x4, f32x4) -> f32x4) ,
61
+ "hadd_pd" => p ! ( "sse3.hadd.pd" , ( f64x2, f64x2) -> f64x2) ,
62
+ "hadd_ps" => p ! ( "sse3.hadd.ps" , ( f32x4, f32x4) -> f32x4) ,
63
+ "hsub_pd" => p ! ( "sse3.hsub.pd" , ( f64x2, f64x2) -> f64x2) ,
64
+ "hsub_ps" => p ! ( "sse3.hsub.ps" , ( f32x4, f32x4) -> f32x4) ,
59
65
60
- "addsub_pd" => p ! ( "sse3.addsub.pd" , ( f64x2, f64x2) -> f64x2) ,
61
- "addsub_ps" => p ! ( "sse3.addsub.ps" , ( f32x4, f32x4) -> f32x4) ,
62
- "hadd_pd" => p ! ( "sse3.hadd.pd" , ( f64x2, f64x2) -> f64x2) ,
63
- "hadd_ps" => p ! ( "sse3.hadd.ps" , ( f32x4, f32x4) -> f32x4) ,
64
- "hsub_pd" => p ! ( "sse3.hsub.pd" , ( f64x2, f64x2) -> f64x2) ,
65
- "hsub_ps" => p ! ( "sse3.hsub.ps" , ( f32x4, f32x4) -> f32x4) ,
66
+ "abs_epi16" => p ! ( "ssse3.pabs.w.128" , ( i16x8) -> i16x8) ,
67
+ "abs_epi32" => p ! ( "ssse3.pabs.d.128" , ( i32x4) -> i32x4) ,
68
+ "abs_epi8" => p ! ( "ssse3.pabs.b.128" , ( i8x16) -> i8x16) ,
69
+ "hadd_epi16" => p ! ( "ssse3.phadd.w.128" , ( i16x8, i16x8) -> i16x8) ,
70
+ "hadd_epi32" => p ! ( "ssse3.phadd.d.128" , ( i32x4, i32x4) -> i32x4) ,
71
+ "hadds_epi16" => p ! ( "ssse3.phadd.sw.128" , ( i16x8, i16x8) -> i16x8) ,
72
+ "hsub_epi16" => p ! ( "ssse3.phsub.w.128" , ( i16x8, i16x8) -> i16x8) ,
73
+ "hsub_epi32" => p ! ( "ssse3.phsub.d.128" , ( i32x4, i32x4) -> i32x4) ,
74
+ "hsubs_epi16" => p ! ( "ssse3.phsub.sw.128" , ( i16x8, i16x8) -> i16x8) ,
75
+ "maddubs_epi16" => p ! ( "ssse3.pmadd.ub.sw.128" , ( i8x16, i8x16) -> i16x8) ,
76
+ "mulhrs_epi16" => p ! ( "ssse3.pmul.hr.sw.128" , ( i16x8, i16x8) -> i16x8) ,
77
+ "shuffle_epi8" => p ! ( "ssse3.pshuf.b.128" , ( i8x16, i8x16) -> i8x16) ,
78
+ "sign_epi16" => p ! ( "ssse3.psign.w.128" , ( i16x8, i16x8) -> i16x8) ,
79
+ "sign_epi32" => p ! ( "ssse3.psign.d.128" , ( i32x4, i32x4) -> i32x4) ,
80
+ "sign_epi8" => p ! ( "ssse3.psign.b.128" , ( i8x16, i8x16) -> i8x16) ,
66
81
67
- "abs_epi16" => p ! ( "ssse3.pabs.w.128" , ( i16x8) -> i16x8) ,
68
- "abs_epi32" => p ! ( "ssse3.pabs.d.128" , ( i32x4) -> i32x4) ,
69
- "abs_epi8" => p ! ( "ssse3.pabs.b.128" , ( i8x16) -> i8x16) ,
70
- "hadd_epi16" => p ! ( "ssse3.phadd.w.128" , ( i16x8, i16x8) -> i16x8) ,
71
- "hadd_epi32" => p ! ( "ssse3.phadd.d.128" , ( i32x4, i32x4) -> i32x4) ,
72
- "hadds_epi16" => p ! ( "ssse3.phadd.sw.128" , ( i16x8, i16x8) -> i16x8) ,
73
- "hsub_epi16" => p ! ( "ssse3.phsub.w.128" , ( i16x8, i16x8) -> i16x8) ,
74
- "hsub_epi32" => p ! ( "ssse3.phsub.d.128" , ( i32x4, i32x4) -> i32x4) ,
75
- "hsubs_epi16" => p ! ( "ssse3.phsub.sw.128" , ( i16x8, i16x8) -> i16x8) ,
76
- "maddubs_epi16" => p ! ( "ssse3.pmadd.ub.sw.128" , ( i8x16, i8x16) -> i16x8) ,
77
- "mulhrs_epi16" => p ! ( "ssse3.pmul.hr.sw.128" , ( i16x8, i16x8) -> i16x8) ,
78
- "shuffle_epi8" => p ! ( "ssse3.pshuf.b.128" , ( i8x16, i8x16) -> i8x16) ,
79
- "sign_epi16" => p ! ( "ssse3.psign.w.128" , ( i16x8, i16x8) -> i16x8) ,
80
- "sign_epi32" => p ! ( "ssse3.psign.d.128" , ( i32x4, i32x4) -> i32x4) ,
81
- "sign_epi8" => p ! ( "ssse3.psign.b.128" , ( i8x16, i8x16) -> i8x16) ,
82
-
83
- "max_epi32" => p ! ( "sse41.pmaxsd" , ( i32x4, i32x4) -> i32x4) ,
84
- "max_epi8" => p ! ( "sse41.pmaxsb" , ( i8x16, i8x16) -> i8x16) ,
85
- "max_epu16" => p ! ( "sse41.pmaxuw" , ( i16x8, i16x8) -> i16x8) ,
86
- "max_epu32" => p ! ( "sse41.pmaxud" , ( i32x4, i32x4) -> i32x4) ,
87
- "min_epi32" => p ! ( "sse41.pminsd" , ( i32x4, i32x4) -> i32x4) ,
88
- "min_epi8" => p ! ( "sse41.pminsb" , ( i8x16, i8x16) -> i8x16) ,
89
- "min_epu16" => p ! ( "sse41.pminuw" , ( i16x8, i16x8) -> i16x8) ,
90
- "min_epu32" => p ! ( "sse41.pminud" , ( i32x4, i32x4) -> i32x4) ,
91
- "minpos_epu16" => p ! ( "sse41.phminposuw" , ( i16x8) -> i16x8) ,
92
- "mul_epi32" => p ! ( "sse41.muldq" , ( i32x4, i32x4) -> i64x2) ,
93
- "packus_epi32" => p ! ( "sse41.packusdw" , ( i32x4, i32x4) -> i16x8) ,
94
- "testc_si128" => p ! ( "sse41.ptestc" , ( i64x2, i64x2) -> i32 ) ,
95
- "testnzc_si128" => p ! ( "sse41.ptestnzc" , ( i64x2, i64x2) -> i32 ) ,
96
- "testz_si128" => p ! ( "sse41.ptestz" , ( i64x2, i64x2) -> i32 ) ,
97
- _ => return None
98
- } )
82
+ "max_epi32" => p ! ( "sse41.pmaxsd" , ( i32x4, i32x4) -> i32x4) ,
83
+ "max_epi8" => p ! ( "sse41.pmaxsb" , ( i8x16, i8x16) -> i8x16) ,
84
+ "max_epu16" => p ! ( "sse41.pmaxuw" , ( i16x8, i16x8) -> i16x8) ,
85
+ "max_epu32" => p ! ( "sse41.pmaxud" , ( i32x4, i32x4) -> i32x4) ,
86
+ "min_epi32" => p ! ( "sse41.pminsd" , ( i32x4, i32x4) -> i32x4) ,
87
+ "min_epi8" => p ! ( "sse41.pminsb" , ( i8x16, i8x16) -> i8x16) ,
88
+ "min_epu16" => p ! ( "sse41.pminuw" , ( i16x8, i16x8) -> i16x8) ,
89
+ "min_epu32" => p ! ( "sse41.pminud" , ( i32x4, i32x4) -> i32x4) ,
90
+ "minpos_epu16" => p ! ( "sse41.phminposuw" , ( i16x8) -> i16x8) ,
91
+ "mul_epi32" => p ! ( "sse41.muldq" , ( i32x4, i32x4) -> i64x2) ,
92
+ "packus_epi32" => p ! ( "sse41.packusdw" , ( i32x4, i32x4) -> i16x8) ,
93
+ "testc_si128" => p ! ( "sse41.ptestc" , ( i64x2, i64x2) -> i32 ) ,
94
+ "testnzc_si128" => p ! ( "sse41.ptestnzc" , ( i64x2, i64x2) -> i32 ) ,
95
+ "testz_si128" => p ! ( "sse41.ptestz" , ( i64x2, i64x2) -> i32 ) ,
96
+ _ => return None
97
+ } )
98
+ } else if name. starts_with ( "mm256_" ) {
99
+ Some ( match & name[ "mm256_" . len ( ) ..] {
100
+ _ => return None ,
101
+ } )
102
+ } else {
103
+ None
104
+ }
99
105
}
0 commit comments