Skip to content

Fix instruction assertions on LLVM 6 #321

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 11, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion coresimd/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
integer_atomics, stmt_expr_attributes, core_intrinsics,
crate_in_paths)]
#![cfg_attr(test, feature(proc_macro, test, attr_literals))]
#![cfg_attr(test, feature(proc_macro, test, attr_literals, abi_vectorcall))]
#![cfg_attr(feature = "cargo-clippy",
allow(inline_always, too_many_arguments, cast_sign_loss,
cast_lossless, cast_possible_wrap,
Expand Down
2 changes: 0 additions & 2 deletions coresimd/src/x86/i586/avx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1921,7 +1921,6 @@ pub unsafe fn _mm256_set_epi32(
#[inline]
#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
_mm256_setr_epi64x(d, c, b, a)
}
Expand Down Expand Up @@ -2001,7 +2000,6 @@ pub unsafe fn _mm256_setr_epi32(
#[inline]
#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
mem::transmute(i64x4::new(a, b, c, d))
}
Expand Down
18 changes: 9 additions & 9 deletions coresimd/src/x86/i586/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
/// Blend packed 32-bit integers from `a` and `b` using control mask `imm8`.
#[inline]
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpblendd, imm8 = 9))]
#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_i32x4();
Expand Down Expand Up @@ -258,7 +258,7 @@ pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
/// Blend packed 32-bit integers from `a` and `b` using control mask `imm8`.
#[inline]
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpblendd, imm8 = 9))]
#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
pub unsafe fn _mm256_blend_epi32(
a: __m256i, b: __m256i, imm8: i32
) -> __m256i {
Expand Down Expand Up @@ -1790,15 +1790,15 @@ pub unsafe fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
/// integers of `a`.
#[inline]
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpermd))]
#[cfg_attr(test, assert_instr(vpermps))]
pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
mem::transmute(permd(a.as_u32x8(), b.as_u32x8()))
}

/// Permutes 64-bit integers from `a` using control mask `imm8`.
#[inline]
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpermq, imm8 = 9))]
#[cfg_attr(test, assert_instr(vpermpd, imm8 = 9))]
pub unsafe fn _mm256_permute4x64_epi64(a: __m256i, imm8: i32) -> __m256i {
let imm8 = (imm8 & 0xFF) as u8;
let zero = _mm256_setzero_si256().as_i64x4();
Expand Down Expand Up @@ -2007,7 +2007,7 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
/// ```
#[inline]
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
pub unsafe fn _mm256_shuffle_epi32(a: __m256i, imm8: i32) -> __m256i {
// simd_shuffleX requires that its selector parameter be made up of
// constant values, but we can't enforce that here. In spirit, we need
Expand Down Expand Up @@ -2762,7 +2762,7 @@ pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
/// ```
#[inline]
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpunpckhdq))]
#[cfg_attr(test, assert_instr(vunpckhps))]
pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
let r: i32x8 = simd_shuffle8(
a.as_i32x8(),
Expand Down Expand Up @@ -2802,7 +2802,7 @@ pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
/// ```
#[inline]
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpunpckldq))]
#[cfg_attr(test, assert_instr(vunpcklps))]
pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
let r: i32x8 =
simd_shuffle8(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
Expand Down Expand Up @@ -2839,7 +2839,7 @@ pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
/// ```
#[inline]
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpunpckhqdq))]
#[cfg_attr(test, assert_instr(vunpckhpd))]
pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
let r: i64x4 = simd_shuffle4(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
mem::transmute(r)
Expand Down Expand Up @@ -2875,7 +2875,7 @@ pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
/// ```
#[inline]
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpunpcklqdq))]
#[cfg_attr(test, assert_instr(vunpcklpd))]
pub unsafe fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
let r: i64x4 = simd_shuffle4(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
mem::transmute(r)
Expand Down
15 changes: 4 additions & 11 deletions coresimd/src/x86/i586/sse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -832,8 +832,7 @@ pub unsafe fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 {
/// lower half of result.
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(all(test, not(windows)), assert_instr(movhlps))]
#[cfg_attr(all(test, windows), assert_instr(unpckhpd))]
#[cfg_attr(test, assert_instr(movhlps))]
pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
// TODO; figure why this is a different instruction on Windows?
simd_shuffle4(a, b, [6, 7, 2, 3])
Expand All @@ -843,8 +842,7 @@ pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
/// higher half of result.
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(unpcklpd))]
#[cfg_attr(all(test, not(target_feature = "sse2")), assert_instr(movlhps))]
#[cfg_attr(test, assert_instr(movlhps))]
pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
simd_shuffle4(a, b, [0, 1, 4, 5])
}
Expand Down Expand Up @@ -900,7 +898,7 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
// 32-bit codegen does not generate `movhps` or `movhpd`, but instead
// `movsd` followed by `unpcklpd` (or `movss'/`unpcklps` if there's no SSE2).
#[cfg_attr(all(test, target_arch = "x86", target_feature = "sse2"),
assert_instr(unpcklpd))]
assert_instr(movlhps))]
#[cfg_attr(all(test, target_arch = "x86", not(target_feature = "sse2")),
assert_instr(unpcklps))]
// TODO: This function is actually not limited to floats, but that's what
Expand Down Expand Up @@ -1095,13 +1093,8 @@ pub unsafe fn _mm_storeh_pi(p: *mut __m64, a: __m128) {
#[inline]
#[target_feature(enable = "sse")]
// On i586 the codegen just generates plane MOVs. No need to test for that.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2"),
not(target_family = "windows")),
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(movlps))]
// Win64 passes `a` by reference, which causes it to generate two 64 bit moves.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2"),
target_family = "windows"),
assert_instr(movsd))]
pub unsafe fn _mm_storel_pi(p: *mut __m64, a: __m128) {
#[cfg(target_arch = "x86")]
{
Expand Down
17 changes: 8 additions & 9 deletions coresimd/src/x86/i586/sse2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,7 @@ pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i {
/// Return the lowest element of `a`.
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, not(windows)), assert_instr(movd))] // FIXME mov on windows
#[cfg_attr(test, assert_instr(movd))]
pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
simd_extract(a.as_i32x4(), 0)
}
Expand Down Expand Up @@ -1207,7 +1207,7 @@ pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
/// Unpack and interleave 32-bit integers from the high half of `a` and `b`.
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(punpckhdq))]
#[cfg_attr(test, assert_instr(unpckhps))]
pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute::<i32x4, _>(simd_shuffle4(
a.as_i32x4(),
Expand All @@ -1219,7 +1219,7 @@ pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
/// Unpack and interleave 64-bit integers from the high half of `a` and `b`.
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(punpckhqdq))]
#[cfg_attr(test, assert_instr(unpckhpd))]
pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
mem::transmute::<i64x2, _>(simd_shuffle2(
a.as_i64x2(),
Expand Down Expand Up @@ -1253,7 +1253,7 @@ pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
/// Unpack and interleave 32-bit integers from the low half of `a` and `b`.
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(punpckldq))]
#[cfg_attr(test, assert_instr(unpcklps))]
pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute::<i32x4, _>(simd_shuffle4(
a.as_i32x4(),
Expand All @@ -1265,7 +1265,7 @@ pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
/// Unpack and interleave 64-bit integers from the low half of `a` and `b`.
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(punpcklqdq))]
#[cfg_attr(test, assert_instr(movlhps))]
pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
mem::transmute::<i64x2, _>(simd_shuffle2(
a.as_i64x2(),
Expand Down Expand Up @@ -1795,7 +1795,6 @@ pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
/// Return the lower double-precision (64-bit) floating-point element of "a".
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, windows), assert_instr(movsd))] // FIXME movq/movlps/mov on other platform
pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 {
simd_extract(a, 0)
}
Expand Down Expand Up @@ -1953,7 +1952,7 @@ pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
/// memory location.
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, not(windows)), assert_instr(movlps))] // FIXME movsd only on windows
#[cfg_attr(test, assert_instr(movlps))]
pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
*mem_addr = simd_extract(a, 0)
}
Expand Down Expand Up @@ -2022,7 +2021,7 @@ pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
/// memory location.
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, not(windows)), assert_instr(movlps))] // FIXME movlpd (movsd on windows)
#[cfg_attr(test, assert_instr(movlps))] // FIXME movlpd
pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
*mem_addr = simd_extract(a, 0);
}
Expand Down Expand Up @@ -2179,7 +2178,7 @@ pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
/// * The [63:0] bits are copied from the [63:0] bits of the first input
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(unpcklpd))]
#[cfg_attr(test, assert_instr(movlhps))]
pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
simd_shuffle2(a, b, [0, 2])
}
Expand Down
4 changes: 2 additions & 2 deletions coresimd/src/x86/i586/sse41.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 {
#[inline]
#[target_feature(enable = "sse4.1")]
// TODO: Add test for Windows
#[cfg_attr(all(test, not(windows)), assert_instr(extractps, imm8 = 0))]
#[cfg_attr(test, assert_instr(extractps, imm8 = 0))]
pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 {
mem::transmute(simd_extract::<_, f32>(a, imm8 as u32 & 0b11))
}
Expand All @@ -141,7 +141,7 @@ pub unsafe fn _mm_extract_epi8(a: __m128i, imm8: i32) -> i32 {
#[inline]
#[target_feature(enable = "sse4.1")]
// TODO: Add test for Windows
#[cfg_attr(all(test, not(windows)), assert_instr(pextrd, imm8 = 1))]
#[cfg_attr(test, assert_instr(extractps, imm8 = 1))]
pub unsafe fn _mm_extract_epi32(a: __m128i, imm8: i32) -> i32 {
let imm8 = (imm8 & 3) as u32;
simd_extract::<_, i32>(a.as_i32x4(), imm8)
Expand Down
2 changes: 1 addition & 1 deletion coresimd/src/x86/x86_64/sse41.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use stdsimd_test::assert_instr;
#[inline]
#[target_feature(enable = "sse4.1")]
// TODO: Add test for Windows
#[cfg_attr(all(test, not(windows)), assert_instr(pextrq, imm8 = 1))]
#[cfg_attr(test, assert_instr(pextrq, imm8 = 1))]
pub unsafe fn _mm_extract_epi64(a: __m128i, imm8: i32) -> i64 {
let imm8 = (imm8 & 1) as u32;
simd_extract(a.as_i64x2(), imm8)
Expand Down
10 changes: 9 additions & 1 deletion stdsimd-test/assert-instr-macro/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,17 @@ pub fn assert_instr(
})
.collect::<Vec<_>>();
let attrs = Append(&attrs);

// Use an ABI on Windows that passes SIMD values in registers, like what
// happens on Unix (I think?) by default.
let abi = if cfg!(windows) {
syn::LitStr::new("vectorcall", proc_macro2::Span::call_site())
} else {
syn::LitStr::new("C", proc_macro2::Span::call_site())
};
let to_test = quote! {
#attrs
unsafe extern fn #shim_name(#(#inputs),*) #ret {
unsafe extern #abi fn #shim_name(#(#inputs),*) #ret {
#name(#(#input_vals),*)
}
};
Expand Down