Skip to content

Commit 8522ae0

Browse files
committed
---
yaml --- r: 232476 b: refs/heads/try c: 67d56db h: refs/heads/master v: v3
1 parent 22ceb24 commit 8522ae0

File tree

3 files changed

+88
-74
lines changed

3 files changed

+88
-74
lines changed

[refs]

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
---
22
refs/heads/master: edeb4f1c86cbf6af8ef9874d4b3af50f721ea1b8
33
refs/heads/snap-stage3: 1af31d4974e33027a68126fa5a5a3c2c6491824f
4-
refs/heads/try: 627784b186e49648f63af0dc24cd912a7b53d56d
4+
refs/heads/try: 67d56db16fe1e8190d704572de41941e568ed568
55
refs/tags/release-0.1: 1f5c5126e96c79d22cb7862f75304136e204f105
66
refs/tags/release-0.2: c870d2dffb391e14efb05aa27898f1f6333a9596
77
refs/tags/release-0.3: b5f0d0f648d9a6153664837026ba1be43d3e2503

branches/try/src/librustc_platform_intrinsics/lib.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,14 @@ fn f(width: u8) -> Type { Type::Float(width) }
4545
fn v(x: Type, length: u8) -> Type { Type::Vector(Box::new(x), length) }
4646

4747
macro_rules! ty {
48+
(f32x8) => (v(f(32), 8));
49+
(f64x4) => (v(f(64), 4));
50+
51+
(i8x32) => (v(i(8), 32));
52+
(i16x16) => (v(i(16), 16));
53+
(i32x8) => (v(i(32), 8));
54+
(i64x4) => (v(i(64), 4));
55+
4856
(f32x4) => (v(f(32), 4));
4957
(f64x2) => (v(f(64), 2));
5058

branches/try/src/librustc_platform_intrinsics/x86.rs

Lines changed: 79 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -18,82 +18,88 @@ macro_rules! p {
1818
}
1919

2020
pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
21-
if !name.starts_with("mm_") { return None }
21+
if name.starts_with("mm_") {
22+
Some(match &name["mm_".len()..] {
23+
"sqrt_ps" => plain!("llvm.sqrt.v4f32", (f32x4) -> f32x4),
24+
"sqrt_pd" => plain!("llvm.sqrt.v2f64", (f64x2) -> f64x2),
2225

23-
Some(match &name["mm_".len()..] {
24-
"sqrt_ps" => plain!("llvm.sqrt.v4f32", (f32x4) -> f32x4),
25-
"sqrt_pd" => plain!("llvm.sqrt.v2f64", (f64x2) -> f64x2),
26+
"movemask_ps" => p!("sse.movmsk.ps", (f32x4) -> i32),
27+
"max_ps" => p!("sse.max.ps", (f32x4, f32x4) -> f32x4),
28+
"min_ps" => p!("sse.min.ps", (f32x4, f32x4) -> f32x4),
29+
"rsqrt_ps" => p!("sse.rsqrt.ps", (f32x4) -> f32x4),
30+
"rcp_ps" => p!("sse.rcp.ps", (f32x4) -> f32x4),
2631

27-
"movemask_ps" => p!("sse.movmsk.ps", (f32x4) -> i32),
28-
"max_ps" => p!("sse.max.ps", (f32x4, f32x4) -> f32x4),
29-
"min_ps" => p!("sse.min.ps", (f32x4, f32x4) -> f32x4),
30-
"rsqrt_ps" => p!("sse.rsqrt.ps", (f32x4) -> f32x4),
31-
"rcp_ps" => p!("sse.rcp.ps", (f32x4) -> f32x4),
32+
"adds_epi16" => p!("sse2.padds.w", (i16x8, i16x8) -> i16x8),
33+
"adds_epi8" => p!("sse2.padds.b", (i8x16, i8x16) -> i8x16),
34+
"adds_epu16" => p!("sse2.paddus.w", (i16x8, i16x8) -> i16x8),
35+
"adds_epu8" => p!("sse2.paddus.b", (i8x16, i8x16) -> i8x16),
36+
"avg_epu16" => p!("sse2.pavg.w", (i16x8, i16x8) -> i16x8),
37+
"avg_epu8" => p!("sse2.pavg.b", (i8x16, i8x16) -> i8x16),
38+
"madd_epi16" => p!("sse2.pmadd.wd", (i16x8, i16x8) -> i32x4),
39+
"max_epi16" => p!("sse2.pmaxs.w", (i16x8, i16x8) -> i16x8),
40+
"max_epu8" => p!("sse2.pmaxu.b", (i8x16, i8x16) -> i8x16),
41+
"max_pd" => p!("sse2.max.pd", (f64x2, f64x2) -> f64x2),
42+
"min_epi16" => p!("sse2.pmins.w", (i16x8, i16x8) -> i16x8),
43+
"min_epu8" => p!("sse2.pminu.b", (i8x16, i8x16) -> i8x16),
44+
"min_pd" => p!("sse2.min.pd", (f64x2, f64x2) -> f64x2),
45+
"movemask_pd" => p!("sse2.movmsk.pd", (f64x2) -> i32),
46+
"movemask_epi8" => p!("sse2.pmovmskb.128", (i8x16) -> i32),
47+
"mul_epu32" => p!("sse2.pmulu.dq", (i32x4, i32x4) -> i64x2),
48+
"mulhi_epi16" => p!("sse2.pmulh.w", (i8x16, i8x16) -> i8x16),
49+
"mulhi_epu16" => p!("sse2.pmulhu.w", (i8x16, i8x16) -> i8x16),
50+
"packs_epi16" => p!("sse2.packsswb.128", (i16x8, i16x8) -> i8x16),
51+
"packs_epi32" => p!("sse2.packssdw.128", (i32x4, i32x4) -> i16x8),
52+
"packus_epi16" => p!("sse2.packuswb.128", (i16x8, i16x8) -> i8x16),
53+
"sad_epu8" => p!("sse2.psad.bw", (i8x16, i8x16) -> i64x2),
54+
"subs_epi16" => p!("sse2.psubs.w", (i16x8, i16x8) -> i16x8),
55+
"subs_epi8" => p!("sse2.psubs.b", (i8x16, i8x16) -> i8x16),
56+
"subs_epu16" => p!("sse2.psubus.w", (i16x8, i16x8) -> i16x8),
57+
"subs_epu8" => p!("sse2.psubus.b", (i8x16, i8x16) -> i8x16),
3258

33-
"adds_epi16" => p!("sse2.padds.w", (i16x8, i16x8) -> i16x8),
34-
"adds_epi8" => p!("sse2.padds.b", (i8x16, i8x16) -> i8x16),
35-
"adds_epu16" => p!("sse2.paddus.w", (i16x8, i16x8) -> i16x8),
36-
"adds_epu8" => p!("sse2.paddus.b", (i8x16, i8x16) -> i8x16),
37-
"avg_epu16" => p!("sse2.pavg.w", (i16x8, i16x8) -> i16x8),
38-
"avg_epu8" => p!("sse2.pavg.b", (i8x16, i8x16) -> i8x16),
39-
"madd_epi16" => p!("sse2.pmadd.wd", (i16x8, i16x8) -> i32x4),
40-
"max_epi16" => p!("sse2.pmaxs.w", (i16x8, i16x8) -> i16x8),
41-
"max_epu8" => p!("sse2.pmaxu.b", (i8x16, i8x16) -> i8x16),
42-
"max_pd" => p!("sse2.max.pd", (f64x2, f64x2) -> f64x2),
43-
"min_epi16" => p!("sse2.pmins.w", (i16x8, i16x8) -> i16x8),
44-
"min_epu8" => p!("sse2.pminu.b", (i8x16, i8x16) -> i8x16),
45-
"min_pd" => p!("sse2.min.pd", (f64x2, f64x2) -> f64x2),
46-
"movemask_pd" => p!("sse2.movmsk.pd", (f64x2) -> i32),
47-
"movemask_epi8" => p!("sse2.pmovmskb.128", (i8x16) -> i32),
48-
"mul_epu32" => p!("sse2.pmulu.dq", (i32x4, i32x4) -> i64x2),
49-
"mulhi_epi16" => p!("sse2.pmulh.w", (i8x16, i8x16) -> i8x16),
50-
"mulhi_epu16" => p!("sse2.pmulhu.w", (i8x16, i8x16) -> i8x16),
51-
"packs_epi16" => p!("sse2.packsswb.128", (i16x8, i16x8) -> i8x16),
52-
"packs_epi32" => p!("sse2.packssdw.128", (i32x4, i32x4) -> i16x8),
53-
"packus_epi16" => p!("sse2.packuswb.128", (i16x8, i16x8) -> i8x16),
54-
"sad_epu8" => p!("sse2.psad.bw", (i8x16, i8x16) -> i64x2),
55-
"subs_epi16" => p!("sse2.psubs.w", (i16x8, i16x8) -> i16x8),
56-
"subs_epi8" => p!("sse2.psubs.b", (i8x16, i8x16) -> i8x16),
57-
"subs_epu16" => p!("sse2.psubus.w", (i16x8, i16x8) -> i16x8),
58-
"subs_epu8" => p!("sse2.psubus.b", (i8x16, i8x16) -> i8x16),
59+
"addsub_pd" => p!("sse3.addsub.pd", (f64x2, f64x2) -> f64x2),
60+
"addsub_ps" => p!("sse3.addsub.ps", (f32x4, f32x4) -> f32x4),
61+
"hadd_pd" => p!("sse3.hadd.pd", (f64x2, f64x2) -> f64x2),
62+
"hadd_ps" => p!("sse3.hadd.ps", (f32x4, f32x4) -> f32x4),
63+
"hsub_pd" => p!("sse3.hsub.pd", (f64x2, f64x2) -> f64x2),
64+
"hsub_ps" => p!("sse3.hsub.ps", (f32x4, f32x4) -> f32x4),
5965

60-
"addsub_pd" => p!("sse3.addsub.pd", (f64x2, f64x2) -> f64x2),
61-
"addsub_ps" => p!("sse3.addsub.ps", (f32x4, f32x4) -> f32x4),
62-
"hadd_pd" => p!("sse3.hadd.pd", (f64x2, f64x2) -> f64x2),
63-
"hadd_ps" => p!("sse3.hadd.ps", (f32x4, f32x4) -> f32x4),
64-
"hsub_pd" => p!("sse3.hsub.pd", (f64x2, f64x2) -> f64x2),
65-
"hsub_ps" => p!("sse3.hsub.ps", (f32x4, f32x4) -> f32x4),
66+
"abs_epi16" => p!("ssse3.pabs.w.128", (i16x8) -> i16x8),
67+
"abs_epi32" => p!("ssse3.pabs.d.128", (i32x4) -> i32x4),
68+
"abs_epi8" => p!("ssse3.pabs.b.128", (i8x16) -> i8x16),
69+
"hadd_epi16" => p!("ssse3.phadd.w.128", (i16x8, i16x8) -> i16x8),
70+
"hadd_epi32" => p!("ssse3.phadd.d.128", (i32x4, i32x4) -> i32x4),
71+
"hadds_epi16" => p!("ssse3.phadd.sw.128", (i16x8, i16x8) -> i16x8),
72+
"hsub_epi16" => p!("ssse3.phsub.w.128", (i16x8, i16x8) -> i16x8),
73+
"hsub_epi32" => p!("ssse3.phsub.d.128", (i32x4, i32x4) -> i32x4),
74+
"hsubs_epi16" => p!("ssse3.phsub.sw.128", (i16x8, i16x8) -> i16x8),
75+
"maddubs_epi16" => p!("ssse3.pmadd.ub.sw.128", (i8x16, i8x16) -> i16x8),
76+
"mulhrs_epi16" => p!("ssse3.pmul.hr.sw.128", (i16x8, i16x8) -> i16x8),
77+
"shuffle_epi8" => p!("ssse3.pshuf.b.128", (i8x16, i8x16) -> i8x16),
78+
"sign_epi16" => p!("ssse3.psign.w.128", (i16x8, i16x8) -> i16x8),
79+
"sign_epi32" => p!("ssse3.psign.d.128", (i32x4, i32x4) -> i32x4),
80+
"sign_epi8" => p!("ssse3.psign.b.128", (i8x16, i8x16) -> i8x16),
6681

67-
"abs_epi16" => p!("ssse3.pabs.w.128", (i16x8) -> i16x8),
68-
"abs_epi32" => p!("ssse3.pabs.d.128", (i32x4) -> i32x4),
69-
"abs_epi8" => p!("ssse3.pabs.b.128", (i8x16) -> i8x16),
70-
"hadd_epi16" => p!("ssse3.phadd.w.128", (i16x8, i16x8) -> i16x8),
71-
"hadd_epi32" => p!("ssse3.phadd.d.128", (i32x4, i32x4) -> i32x4),
72-
"hadds_epi16" => p!("ssse3.phadd.sw.128", (i16x8, i16x8) -> i16x8),
73-
"hsub_epi16" => p!("ssse3.phsub.w.128", (i16x8, i16x8) -> i16x8),
74-
"hsub_epi32" => p!("ssse3.phsub.d.128", (i32x4, i32x4) -> i32x4),
75-
"hsubs_epi16" => p!("ssse3.phsub.sw.128", (i16x8, i16x8) -> i16x8),
76-
"maddubs_epi16" => p!("ssse3.pmadd.ub.sw.128", (i8x16, i8x16) -> i16x8),
77-
"mulhrs_epi16" => p!("ssse3.pmul.hr.sw.128", (i16x8, i16x8) -> i16x8),
78-
"shuffle_epi8" => p!("ssse3.pshuf.b.128", (i8x16, i8x16) -> i8x16),
79-
"sign_epi16" => p!("ssse3.psign.w.128", (i16x8, i16x8) -> i16x8),
80-
"sign_epi32" => p!("ssse3.psign.d.128", (i32x4, i32x4) -> i32x4),
81-
"sign_epi8" => p!("ssse3.psign.b.128", (i8x16, i8x16) -> i8x16),
82-
83-
"max_epi32" => p!("sse41.pmaxsd", (i32x4, i32x4) -> i32x4),
84-
"max_epi8" => p!("sse41.pmaxsb", (i8x16, i8x16) -> i8x16),
85-
"max_epu16" => p!("sse41.pmaxuw", (i16x8, i16x8) -> i16x8),
86-
"max_epu32" => p!("sse41.pmaxud", (i32x4, i32x4) -> i32x4),
87-
"min_epi32" => p!("sse41.pminsd", (i32x4, i32x4) -> i32x4),
88-
"min_epi8" => p!("sse41.pminsb", (i8x16, i8x16) -> i8x16),
89-
"min_epu16" => p!("sse41.pminuw", (i16x8, i16x8) -> i16x8),
90-
"min_epu32" => p!("sse41.pminud", (i32x4, i32x4) -> i32x4),
91-
"minpos_epu16" => p!("sse41.phminposuw", (i16x8) -> i16x8),
92-
"mul_epi32" => p!("sse41.muldq", (i32x4, i32x4) -> i64x2),
93-
"packus_epi32" => p!("sse41.packusdw", (i32x4, i32x4) -> i16x8),
94-
"testc_si128" => p!("sse41.ptestc", (i64x2, i64x2) -> i32),
95-
"testnzc_si128" => p!("sse41.ptestnzc", (i64x2, i64x2) -> i32),
96-
"testz_si128" => p!("sse41.ptestz", (i64x2, i64x2) -> i32),
97-
_ => return None
98-
})
82+
"max_epi32" => p!("sse41.pmaxsd", (i32x4, i32x4) -> i32x4),
83+
"max_epi8" => p!("sse41.pmaxsb", (i8x16, i8x16) -> i8x16),
84+
"max_epu16" => p!("sse41.pmaxuw", (i16x8, i16x8) -> i16x8),
85+
"max_epu32" => p!("sse41.pmaxud", (i32x4, i32x4) -> i32x4),
86+
"min_epi32" => p!("sse41.pminsd", (i32x4, i32x4) -> i32x4),
87+
"min_epi8" => p!("sse41.pminsb", (i8x16, i8x16) -> i8x16),
88+
"min_epu16" => p!("sse41.pminuw", (i16x8, i16x8) -> i16x8),
89+
"min_epu32" => p!("sse41.pminud", (i32x4, i32x4) -> i32x4),
90+
"minpos_epu16" => p!("sse41.phminposuw", (i16x8) -> i16x8),
91+
"mul_epi32" => p!("sse41.muldq", (i32x4, i32x4) -> i64x2),
92+
"packus_epi32" => p!("sse41.packusdw", (i32x4, i32x4) -> i16x8),
93+
"testc_si128" => p!("sse41.ptestc", (i64x2, i64x2) -> i32),
94+
"testnzc_si128" => p!("sse41.ptestnzc", (i64x2, i64x2) -> i32),
95+
"testz_si128" => p!("sse41.ptestz", (i64x2, i64x2) -> i32),
96+
_ => return None
97+
})
98+
} else if name.starts_with("mm256_") {
99+
Some(match &name["mm256_".len()..] {
100+
_ => return None,
101+
})
102+
} else {
103+
None
104+
}
99105
}

0 commit comments

Comments
 (0)