Skip to content

Commit 128cb1c

Browse files
authored
Use simd intrinsics for max and min (#1357)
1 parent 83cd38d commit 128cb1c

File tree

3 files changed

+72
-72
lines changed

3 files changed

+72
-72
lines changed

library/stdarch/crates/core_arch/src/x86/avx2.rs

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1857,7 +1857,9 @@ pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m25
18571857
#[cfg_attr(test, assert_instr(vpmaxsw))]
18581858
#[stable(feature = "simd_x86", since = "1.27.0")]
18591859
pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1860-
transmute(pmaxsw(a.as_i16x16(), b.as_i16x16()))
1860+
let a = a.as_i16x16();
1861+
let b = b.as_i16x16();
1862+
transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
18611863
}
18621864

18631865
/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
@@ -1869,7 +1871,9 @@ pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
18691871
#[cfg_attr(test, assert_instr(vpmaxsd))]
18701872
#[stable(feature = "simd_x86", since = "1.27.0")]
18711873
pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1872-
transmute(pmaxsd(a.as_i32x8(), b.as_i32x8()))
1874+
let a = a.as_i32x8();
1875+
let b = b.as_i32x8();
1876+
transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
18731877
}
18741878

18751879
/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
@@ -1881,7 +1885,9 @@ pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
18811885
#[cfg_attr(test, assert_instr(vpmaxsb))]
18821886
#[stable(feature = "simd_x86", since = "1.27.0")]
18831887
pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
1884-
transmute(pmaxsb(a.as_i8x32(), b.as_i8x32()))
1888+
let a = a.as_i8x32();
1889+
let b = b.as_i8x32();
1890+
transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
18851891
}
18861892

18871893
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
@@ -1893,7 +1899,9 @@ pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
18931899
#[cfg_attr(test, assert_instr(vpmaxuw))]
18941900
#[stable(feature = "simd_x86", since = "1.27.0")]
18951901
pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
1896-
transmute(pmaxuw(a.as_u16x16(), b.as_u16x16()))
1902+
let a = a.as_u16x16();
1903+
let b = b.as_u16x16();
1904+
transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
18971905
}
18981906

18991907
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
@@ -1905,7 +1913,9 @@ pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
19051913
#[cfg_attr(test, assert_instr(vpmaxud))]
19061914
#[stable(feature = "simd_x86", since = "1.27.0")]
19071915
pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
1908-
transmute(pmaxud(a.as_u32x8(), b.as_u32x8()))
1916+
let a = a.as_u32x8();
1917+
let b = b.as_u32x8();
1918+
transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
19091919
}
19101920

19111921
/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
@@ -1917,7 +1927,9 @@ pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
19171927
#[cfg_attr(test, assert_instr(vpmaxub))]
19181928
#[stable(feature = "simd_x86", since = "1.27.0")]
19191929
pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
1920-
transmute(pmaxub(a.as_u8x32(), b.as_u8x32()))
1930+
let a = a.as_u8x32();
1931+
let b = b.as_u8x32();
1932+
transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
19211933
}
19221934

19231935
/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
@@ -1929,7 +1941,9 @@ pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
19291941
#[cfg_attr(test, assert_instr(vpminsw))]
19301942
#[stable(feature = "simd_x86", since = "1.27.0")]
19311943
pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
1932-
transmute(pminsw(a.as_i16x16(), b.as_i16x16()))
1944+
let a = a.as_i16x16();
1945+
let b = b.as_i16x16();
1946+
transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
19331947
}
19341948

19351949
/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
@@ -1941,7 +1955,9 @@ pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
19411955
#[cfg_attr(test, assert_instr(vpminsd))]
19421956
#[stable(feature = "simd_x86", since = "1.27.0")]
19431957
pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
1944-
transmute(pminsd(a.as_i32x8(), b.as_i32x8()))
1958+
let a = a.as_i32x8();
1959+
let b = b.as_i32x8();
1960+
transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
19451961
}
19461962

19471963
/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
@@ -1953,7 +1969,9 @@ pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
19531969
#[cfg_attr(test, assert_instr(vpminsb))]
19541970
#[stable(feature = "simd_x86", since = "1.27.0")]
19551971
pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
1956-
transmute(pminsb(a.as_i8x32(), b.as_i8x32()))
1972+
let a = a.as_i8x32();
1973+
let b = b.as_i8x32();
1974+
transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
19571975
}
19581976

19591977
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
@@ -1965,7 +1983,9 @@ pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
19651983
#[cfg_attr(test, assert_instr(vpminuw))]
19661984
#[stable(feature = "simd_x86", since = "1.27.0")]
19671985
pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
1968-
transmute(pminuw(a.as_u16x16(), b.as_u16x16()))
1986+
let a = a.as_u16x16();
1987+
let b = b.as_u16x16();
1988+
transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
19691989
}
19701990

19711991
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
@@ -1977,7 +1997,9 @@ pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
19771997
#[cfg_attr(test, assert_instr(vpminud))]
19781998
#[stable(feature = "simd_x86", since = "1.27.0")]
19791999
pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
1980-
transmute(pminud(a.as_u32x8(), b.as_u32x8()))
2000+
let a = a.as_u32x8();
2001+
let b = b.as_u32x8();
2002+
transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
19812003
}
19822004

19832005
/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
@@ -1989,7 +2011,9 @@ pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
19892011
#[cfg_attr(test, assert_instr(vpminub))]
19902012
#[stable(feature = "simd_x86", since = "1.27.0")]
19912013
pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
1992-
transmute(pminub(a.as_u8x32(), b.as_u8x32()))
2014+
let a = a.as_u8x32();
2015+
let b = b.as_u8x32();
2016+
transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
19932017
}
19942018

19952019
/// Creates mask from the most significant bit of each 8-bit element in `a`,
@@ -3620,30 +3644,6 @@ extern "C" {
36203644
fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2);
36213645
#[link_name = "llvm.x86.avx2.maskstore.q.256"]
36223646
fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
3623-
#[link_name = "llvm.x86.avx2.pmaxs.w"]
3624-
fn pmaxsw(a: i16x16, b: i16x16) -> i16x16;
3625-
#[link_name = "llvm.x86.avx2.pmaxs.d"]
3626-
fn pmaxsd(a: i32x8, b: i32x8) -> i32x8;
3627-
#[link_name = "llvm.x86.avx2.pmaxs.b"]
3628-
fn pmaxsb(a: i8x32, b: i8x32) -> i8x32;
3629-
#[link_name = "llvm.x86.avx2.pmaxu.w"]
3630-
fn pmaxuw(a: u16x16, b: u16x16) -> u16x16;
3631-
#[link_name = "llvm.x86.avx2.pmaxu.d"]
3632-
fn pmaxud(a: u32x8, b: u32x8) -> u32x8;
3633-
#[link_name = "llvm.x86.avx2.pmaxu.b"]
3634-
fn pmaxub(a: u8x32, b: u8x32) -> u8x32;
3635-
#[link_name = "llvm.x86.avx2.pmins.w"]
3636-
fn pminsw(a: i16x16, b: i16x16) -> i16x16;
3637-
#[link_name = "llvm.x86.avx2.pmins.d"]
3638-
fn pminsd(a: i32x8, b: i32x8) -> i32x8;
3639-
#[link_name = "llvm.x86.avx2.pmins.b"]
3640-
fn pminsb(a: i8x32, b: i8x32) -> i8x32;
3641-
#[link_name = "llvm.x86.avx2.pminu.w"]
3642-
fn pminuw(a: u16x16, b: u16x16) -> u16x16;
3643-
#[link_name = "llvm.x86.avx2.pminu.d"]
3644-
fn pminud(a: u32x8, b: u32x8) -> u32x8;
3645-
#[link_name = "llvm.x86.avx2.pminu.b"]
3646-
fn pminub(a: u8x32, b: u8x32) -> u8x32;
36473647
#[link_name = "llvm.x86.avx2.mpsadbw"]
36483648
fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
36493649
#[link_name = "llvm.x86.avx2.pmulhu.w"]

library/stdarch/crates/core_arch/src/x86/sse2.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,9 @@ pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
203203
#[cfg_attr(test, assert_instr(pmaxsw))]
204204
#[stable(feature = "simd_x86", since = "1.27.0")]
205205
pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
206-
transmute(pmaxsw(a.as_i16x8(), b.as_i16x8()))
206+
let a = a.as_i16x8();
207+
let b = b.as_i16x8();
208+
transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
207209
}
208210

209211
/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
@@ -215,7 +217,9 @@ pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
215217
#[cfg_attr(test, assert_instr(pmaxub))]
216218
#[stable(feature = "simd_x86", since = "1.27.0")]
217219
pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
218-
transmute(pmaxub(a.as_u8x16(), b.as_u8x16()))
220+
let a = a.as_u8x16();
221+
let b = b.as_u8x16();
222+
transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
219223
}
220224

221225
/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
@@ -227,7 +231,9 @@ pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
227231
#[cfg_attr(test, assert_instr(pminsw))]
228232
#[stable(feature = "simd_x86", since = "1.27.0")]
229233
pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
230-
transmute(pminsw(a.as_i16x8(), b.as_i16x8()))
234+
let a = a.as_i16x8();
235+
let b = b.as_i16x8();
236+
transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
231237
}
232238

233239
/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
@@ -239,7 +245,9 @@ pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
239245
#[cfg_attr(test, assert_instr(pminub))]
240246
#[stable(feature = "simd_x86", since = "1.27.0")]
241247
pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
242-
transmute(pminub(a.as_u8x16(), b.as_u8x16()))
248+
let a = a.as_u8x16();
249+
let b = b.as_u8x16();
250+
transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
243251
}
244252

245253
/// Multiplies the packed 16-bit integers in `a` and `b`.
@@ -2798,14 +2806,6 @@ extern "C" {
27982806
fn pavgw(a: u16x8, b: u16x8) -> u16x8;
27992807
#[link_name = "llvm.x86.sse2.pmadd.wd"]
28002808
fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
2801-
#[link_name = "llvm.x86.sse2.pmaxs.w"]
2802-
fn pmaxsw(a: i16x8, b: i16x8) -> i16x8;
2803-
#[link_name = "llvm.x86.sse2.pmaxu.b"]
2804-
fn pmaxub(a: u8x16, b: u8x16) -> u8x16;
2805-
#[link_name = "llvm.x86.sse2.pmins.w"]
2806-
fn pminsw(a: i16x8, b: i16x8) -> i16x8;
2807-
#[link_name = "llvm.x86.sse2.pminu.b"]
2808-
fn pminub(a: u8x16, b: u8x16) -> u8x16;
28092809
#[link_name = "llvm.x86.sse2.pmulh.w"]
28102810
fn pmulhw(a: i16x8, b: i16x8) -> i16x8;
28112811
#[link_name = "llvm.x86.sse2.pmulhu.w"]

library/stdarch/crates/core_arch/src/x86/sse41.rs

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,9 @@ pub unsafe fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
281281
#[cfg_attr(test, assert_instr(pmaxsb))]
282282
#[stable(feature = "simd_x86", since = "1.27.0")]
283283
pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
284-
transmute(pmaxsb(a.as_i8x16(), b.as_i8x16()))
284+
let a = a.as_i8x16();
285+
let b = b.as_i8x16();
286+
transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
285287
}
286288

287289
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
@@ -293,7 +295,9 @@ pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
293295
#[cfg_attr(test, assert_instr(pmaxuw))]
294296
#[stable(feature = "simd_x86", since = "1.27.0")]
295297
pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
296-
transmute(pmaxuw(a.as_u16x8(), b.as_u16x8()))
298+
let a = a.as_u16x8();
299+
let b = b.as_u16x8();
300+
transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
297301
}
298302

299303
/// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum
@@ -305,7 +309,9 @@ pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
305309
#[cfg_attr(test, assert_instr(pmaxsd))]
306310
#[stable(feature = "simd_x86", since = "1.27.0")]
307311
pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
308-
transmute(pmaxsd(a.as_i32x4(), b.as_i32x4()))
312+
let a = a.as_i32x4();
313+
let b = b.as_i32x4();
314+
transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
309315
}
310316

311317
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
@@ -317,7 +323,9 @@ pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
317323
#[cfg_attr(test, assert_instr(pmaxud))]
318324
#[stable(feature = "simd_x86", since = "1.27.0")]
319325
pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
320-
transmute(pmaxud(a.as_u32x4(), b.as_u32x4()))
326+
let a = a.as_u32x4();
327+
let b = b.as_u32x4();
328+
transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
321329
}
322330

323331
/// Compares packed 8-bit integers in `a` and `b` and returns packed minimum
@@ -329,7 +337,9 @@ pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
329337
#[cfg_attr(test, assert_instr(pminsb))]
330338
#[stable(feature = "simd_x86", since = "1.27.0")]
331339
pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
332-
transmute(pminsb(a.as_i8x16(), b.as_i8x16()))
340+
let a = a.as_i8x16();
341+
let b = b.as_i8x16();
342+
transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
333343
}
334344

335345
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
@@ -341,7 +351,9 @@ pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
341351
#[cfg_attr(test, assert_instr(pminuw))]
342352
#[stable(feature = "simd_x86", since = "1.27.0")]
343353
pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
344-
transmute(pminuw(a.as_u16x8(), b.as_u16x8()))
354+
let a = a.as_u16x8();
355+
let b = b.as_u16x8();
356+
transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
345357
}
346358

347359
/// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum
@@ -353,7 +365,9 @@ pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
353365
#[cfg_attr(test, assert_instr(pminsd))]
354366
#[stable(feature = "simd_x86", since = "1.27.0")]
355367
pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
356-
transmute(pminsd(a.as_i32x4(), b.as_i32x4()))
368+
let a = a.as_i32x4();
369+
let b = b.as_i32x4();
370+
transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
357371
}
358372

359373
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
@@ -365,7 +379,9 @@ pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
365379
#[cfg_attr(test, assert_instr(pminud))]
366380
#[stable(feature = "simd_x86", since = "1.27.0")]
367381
pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
368-
transmute(pminud(a.as_u32x4(), b.as_u32x4()))
382+
let a = a.as_u32x4();
383+
let b = b.as_u32x4();
384+
transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
369385
}
370386

371387
/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
@@ -1122,22 +1138,6 @@ extern "C" {
11221138
fn pblendw(a: i16x8, b: i16x8, imm8: u8) -> i16x8;
11231139
#[link_name = "llvm.x86.sse41.insertps"]
11241140
fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128;
1125-
#[link_name = "llvm.x86.sse41.pmaxsb"]
1126-
fn pmaxsb(a: i8x16, b: i8x16) -> i8x16;
1127-
#[link_name = "llvm.x86.sse41.pmaxuw"]
1128-
fn pmaxuw(a: u16x8, b: u16x8) -> u16x8;
1129-
#[link_name = "llvm.x86.sse41.pmaxsd"]
1130-
fn pmaxsd(a: i32x4, b: i32x4) -> i32x4;
1131-
#[link_name = "llvm.x86.sse41.pmaxud"]
1132-
fn pmaxud(a: u32x4, b: u32x4) -> u32x4;
1133-
#[link_name = "llvm.x86.sse41.pminsb"]
1134-
fn pminsb(a: i8x16, b: i8x16) -> i8x16;
1135-
#[link_name = "llvm.x86.sse41.pminuw"]
1136-
fn pminuw(a: u16x8, b: u16x8) -> u16x8;
1137-
#[link_name = "llvm.x86.sse41.pminsd"]
1138-
fn pminsd(a: i32x4, b: i32x4) -> i32x4;
1139-
#[link_name = "llvm.x86.sse41.pminud"]
1140-
fn pminud(a: u32x4, b: u32x4) -> u32x4;
11411141
#[link_name = "llvm.x86.sse41.packusdw"]
11421142
fn packusdw(a: i32x4, b: i32x4) -> u16x8;
11431143
#[link_name = "llvm.x86.sse41.dppd"]

0 commit comments

Comments
 (0)