-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AVX10.2] Fix wrong mask casting in some convert intrinsics #126627
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AVX10.2] Fix wrong mask casting in some convert intrinsics #126627
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: Mikołaj Piróg (mikolaj-pirog) ChangesFound during work on #120927. This caused the compiler to silently drop ignore half of the mask in the specific intrinsics. Full diff: https://github.com/llvm/llvm-project/pull/126627.diff 2 Files Affected:
diff --git a/clang/lib/Headers/avx10_2convertintrin.h b/clang/lib/Headers/avx10_2convertintrin.h
index c419323910f187a..07722090c30ee93 100644
--- a/clang/lib/Headers/avx10_2convertintrin.h
+++ b/clang/lib/Headers/avx10_2convertintrin.h
@@ -260,13 +260,13 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_bf8(__m256h __A,
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvt2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
return (__m256i)__builtin_ia32_selectb_256(
- (__mmask16)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), (__v32qi)__W);
+ (__mmask32)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), (__v32qi)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvt2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) {
return (__m256i)__builtin_ia32_selectb_256(
- (__mmask16)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B),
+ (__mmask32)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B),
(__v32qi)(__m256i)_mm256_setzero_si256());
}
@@ -297,13 +297,13 @@ _mm256_cvts2ph_bf8(__m256h __A, __m256h __B) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvts2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
return (__m256i)__builtin_ia32_selectb_256(
- (__mmask16)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), (__v32qi)__W);
+ (__mmask32)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), (__v32qi)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvts2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) {
return (__m256i)__builtin_ia32_selectb_256(
- (__mmask16)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B),
+ (__mmask32)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B),
(__v32qi)(__m256i)_mm256_setzero_si256());
}
@@ -334,13 +334,13 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_hf8(__m256h __A,
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvt2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
return (__m256i)__builtin_ia32_selectb_256(
- (__mmask16)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), (__v32qi)__W);
+ (__mmask32)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), (__v32qi)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvt2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) {
return (__m256i)__builtin_ia32_selectb_256(
- (__mmask16)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B),
+ (__mmask32)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B),
(__v32qi)(__m256i)_mm256_setzero_si256());
}
@@ -371,13 +371,13 @@ _mm256_cvts2ph_hf8(__m256h __A, __m256h __B) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvts2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
return (__m256i)__builtin_ia32_selectb_256(
- (__mmask16)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), (__v32qi)__W);
+ (__mmask32)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), (__v32qi)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvts2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) {
return (__m256i)__builtin_ia32_selectb_256(
- (__mmask16)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B),
+ (__mmask32)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B),
(__v32qi)(__m256i)_mm256_setzero_si256());
}
diff --git a/clang/test/CodeGen/X86/avx10_2convert-builtins.c b/clang/test/CodeGen/X86/avx10_2convert-builtins.c
index d73a223922ce03d..87fc6ffd7bc170a 100644
--- a/clang/test/CodeGen/X86/avx10_2convert-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2convert-builtins.c
@@ -231,7 +231,7 @@ __m256i test_mm256_cvt2ph_bf8(__m256h __A, __m256h __B) {
return _mm256_cvt2ph_bf8(__A, __B);
}
-__m256i test_mm256_mask_cvt2ph_bf8(__m256i __W, __mmask16 __U, __m256h __A, __m256h __B) {
+__m256i test_mm256_mask_cvt2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
// CHECK-LABEL: @test_mm256_mask_cvt2ph_bf8(
// CHECK: call <32 x i8> @llvm.x86.avx10.vcvt2ph2bf8256(
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
@@ -239,7 +239,7 @@ __m256i test_mm256_mask_cvt2ph_bf8(__m256i __W, __mmask16 __U, __m256h __A, __m2
return _mm256_mask_cvt2ph_bf8(__W, __U, __A, __B);
}
-__m256i test_mm256_maskz_cvt2ph_bf8(__mmask16 __U, __m256h __A, __m256h __B) {
+__m256i test_mm256_maskz_cvt2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) {
// CHECK-LABEL: @test_mm256_maskz_cvt2ph_bf8(
// CHECK: call <32 x i8> @llvm.x86.avx10.vcvt2ph2bf8256(
// CHECK: zeroinitializer
@@ -275,7 +275,7 @@ __m256i test_mm256_cvts2ph_bf8(__m256h __A, __m256h __B) {
return _mm256_cvts2ph_bf8(__A, __B);
}
-__m256i test_mm256_mask_cvts2ph_bf8(__m256i __W, __mmask16 __U, __m256h __A, __m256h __B) {
+__m256i test_mm256_mask_cvts2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
// CHECK-LABEL: @test_mm256_mask_cvts2ph_bf8(
// CHECK: call <32 x i8> @llvm.x86.avx10.vcvt2ph2bf8s256(
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
@@ -283,7 +283,7 @@ __m256i test_mm256_mask_cvts2ph_bf8(__m256i __W, __mmask16 __U, __m256h __A, __m
return _mm256_mask_cvts2ph_bf8(__W, __U, __A, __B);
}
-__m256i test_mm256_maskz_cvts2ph_bf8(__mmask16 __U, __m256h __A, __m256h __B) {
+__m256i test_mm256_maskz_cvts2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) {
// CHECK-LABEL: @test_mm256_maskz_cvts2ph_bf8(
// CHECK: call <32 x i8> @llvm.x86.avx10.vcvt2ph2bf8s256(
// CHECK: zeroinitializer
@@ -319,7 +319,7 @@ __m256i test_mm256_cvt2ph_hf8(__m256h __A, __m256h __B) {
return _mm256_cvt2ph_hf8(__A, __B);
}
-__m256i test_mm256_mask_cvt2ph_hf8(__m256i __W, __mmask16 __U, __m256h __A, __m256h __B) {
+__m256i test_mm256_mask_cvt2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
// CHECK-LABEL: @test_mm256_mask_cvt2ph_hf8(
// CHECK: call <32 x i8> @llvm.x86.avx10.vcvt2ph2hf8256(
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
@@ -327,7 +327,7 @@ __m256i test_mm256_mask_cvt2ph_hf8(__m256i __W, __mmask16 __U, __m256h __A, __m2
return _mm256_mask_cvt2ph_hf8(__W, __U, __A, __B);
}
-__m256i test_mm256_maskz_cvt2ph_hf8(__mmask16 __U, __m256h __A, __m256h __B) {
+__m256i test_mm256_maskz_cvt2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) {
// CHECK-LABEL: @test_mm256_maskz_cvt2ph_hf8(
// CHECK: call <32 x i8> @llvm.x86.avx10.vcvt2ph2hf8256(
// CHECK: zeroinitializer
@@ -363,7 +363,7 @@ __m256i test_mm256_cvts2ph_hf8(__m256h __A, __m256h __B) {
return _mm256_cvts2ph_hf8(__A, __B);
}
-__m256i test_mm256_mask_cvts2ph_hf8(__m256i __W, __mmask16 __U, __m256h __A, __m256h __B) {
+__m256i test_mm256_mask_cvts2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
// CHECK-LABEL: @test_mm256_mask_cvts2ph_hf8(
// CHECK: call <32 x i8> @llvm.x86.avx10.vcvt2ph2hf8s256(
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
@@ -371,7 +371,7 @@ __m256i test_mm256_mask_cvts2ph_hf8(__m256i __W, __mmask16 __U, __m256h __A, __m
return _mm256_mask_cvts2ph_hf8(__W, __U, __A, __B);
}
-__m256i test_mm256_maskz_cvts2ph_hf8(__mmask16 __U, __m256h __A, __m256h __B) {
+__m256i test_mm256_maskz_cvts2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) {
// CHECK-LABEL: @test_mm256_maskz_cvts2ph_hf8(
// CHECK: call <32 x i8> @llvm.x86.avx10.vcvt2ph2hf8s256(
// CHECK: zeroinitializer
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cool!
/cherry-pick af522c5 |
/pull-request #126666 |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/14428 Here is the relevant piece of the build log for the reference
|
The buildbot failure is unrelated to this change |
) Found during work on llvm#120927. This caused the compiler to silently drop ignore half of the mask in the specific intrinsics. (cherry picked from commit af522c5)
) Found during work on llvm#120927. This caused the compiler to silently drop ignore half of the mask in the specific intrinsics.
) Found during work on llvm#120927. This caused the compiler to silently drop ignore half of the mask in the specific intrinsics.
) Found during work on llvm#120927. This caused the compiler to silently drop ignore half of the mask in the specific intrinsics.
Found during work on #120927. This caused the compiler to silently drop ignore half of the mask in the specific intrinsics.