Skip to content

Commit 7e9747b

Browse files
committed
[X86][F16C] Remove cvtph2ps intrinsics and use generic half2float conversion (PR37554)
This removes everything but int_x86_avx512_mask_vcvtph2ps_512 which provides the SAE variant, but even this can use the fpext generic if the rounding control is the default. Differential Revision: https://reviews.llvm.org/D75162
1 parent 777e97c commit 7e9747b

21 files changed

+744
-573
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10327,6 +10327,46 @@ Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
1032710327
return EmitX86CpuIs(CPUStr);
1032810328
}
1032910329

10330+
// Convert F16 halfs to floats.
10331+
static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,
10332+
ArrayRef<Value *> Ops,
10333+
llvm::Type *DstTy) {
10334+
assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
10335+
"Unknown cvtph2ps intrinsic");
10336+
10337+
// If the SAE intrinsic doesn't use default rounding then we can't upgrade.
10338+
if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
10339+
Intrinsic::ID IID = Intrinsic::x86_avx512_mask_vcvtph2ps_512;
10340+
Function *F =
10341+
CGF.CGM.getIntrinsic(IID, {DstTy, Ops[0]->getType(), Ops[1]->getType(),
10342+
Ops[2]->getType(), Ops[3]->getType()});
10343+
return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
10344+
}
10345+
10346+
unsigned NumDstElts = DstTy->getVectorNumElements();
10347+
Value *Src = Ops[0];
10348+
10349+
// Extract the subvector.
10350+
if (NumDstElts != Src->getType()->getVectorNumElements()) {
10351+
assert(NumDstElts == 4 && "Unexpected vector size");
10352+
uint32_t ShuffleMask[4] = {0, 1, 2, 3};
10353+
Src = CGF.Builder.CreateShuffleVector(Src, UndefValue::get(Src->getType()),
10354+
ShuffleMask);
10355+
}
10356+
10357+
// Bitcast from vXi16 to vXf16.
10358+
llvm::Type *HalfTy = llvm::VectorType::get(
10359+
llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
10360+
Src = CGF.Builder.CreateBitCast(Src, HalfTy);
10361+
10362+
// Perform the fp-extension.
10363+
Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
10364+
10365+
if (Ops.size() >= 3)
10366+
Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
10367+
return Res;
10368+
}
10369+
1033010370
// Convert a BF16 to a float.
1033110371
static Value *EmitX86CvtBF16ToFloatExpr(CodeGenFunction &CGF,
1033210372
const CallExpr *E,
@@ -12531,6 +12571,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1253112571
case X86::BI__builtin_ia32_cmpordsd:
1253212572
return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
1253312573

12574+
// f16c half2float intrinsics
12575+
case X86::BI__builtin_ia32_vcvtph2ps:
12576+
case X86::BI__builtin_ia32_vcvtph2ps256:
12577+
case X86::BI__builtin_ia32_vcvtph2ps_mask:
12578+
case X86::BI__builtin_ia32_vcvtph2ps256_mask:
12579+
case X86::BI__builtin_ia32_vcvtph2ps512_mask:
12580+
return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
12581+
1253412582
// AVX512 bf16 intrinsics
1253512583
case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
1253612584
Ops[2] = getMaskVecValue(*this, Ops[2],

clang/test/CodeGen/avx512f-builtins-constrained.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,21 +171,32 @@ __m128 test_mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B){
171171
__m512 test_mm512_cvtph_ps (__m256i __A)
172172
{
173173
// COMMON-LABEL: test_mm512_cvtph_ps
174-
// COMMONIR: @llvm.x86.avx512.mask.vcvtph2ps.512
174+
// COMMONIR: bitcast <4 x i64> %{{.*}} to <16 x i16>
175+
// COMMONIR: bitcast <16 x i16> %{{.*}} to <16 x half>
176+
// UNCONSTRAINED: fpext <16 x half> %{{.*}} to <16 x float>
177+
// CONSTRAINED: call <16 x float> @llvm.experimental.constrained.fpext.v16f32.v16f16(<16 x half> %{{.*}}, metadata !"fpexcept.strict")
175178
return _mm512_cvtph_ps (__A);
176179
}
177180

178181
__m512 test_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
179182
{
180183
// COMMON-LABEL: test_mm512_mask_cvtph_ps
181-
// COMMONIR: @llvm.x86.avx512.mask.vcvtph2ps.512
184+
// COMMONIR: bitcast <4 x i64> %{{.*}} to <16 x i16>
185+
// COMMONIR: bitcast <16 x i16> %{{.*}} to <16 x half>
186+
// UNCONSTRAINED: fpext <16 x half> %{{.*}} to <16 x float>
187+
// CONSTRAINED: call <16 x float> @llvm.experimental.constrained.fpext.v16f32.v16f16(<16 x half> %{{.*}}, metadata !"fpexcept.strict")
188+
// COMMONIR: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
182189
return _mm512_mask_cvtph_ps (__W,__U,__A);
183190
}
184191

185192
__m512 test_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
186193
{
187194
// COMMON-LABEL: test_mm512_maskz_cvtph_ps
188-
// COMMONIR: @llvm.x86.avx512.mask.vcvtph2ps.512
195+
// COMMONIR: bitcast <4 x i64> %{{.*}} to <16 x i16>
196+
// COMMONIR: bitcast <16 x i16> %{{.*}} to <16 x half>
197+
// UNCONSTRAINED: fpext <16 x half> %{{.*}} to <16 x float>
198+
// CONSTRAINED: call <16 x float> @llvm.experimental.constrained.fpext.v16f32.v16f16(<16 x half> %{{.*}}, metadata !"fpexcept.strict")
199+
// COMMONIR: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
189200
return _mm512_maskz_cvtph_ps (__U,__A);
190201
}
191202

clang/test/CodeGen/avx512f-builtins.c

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9463,21 +9463,29 @@ __m256 test_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
94639463
__m512 test_mm512_cvtph_ps (__m256i __A)
94649464
{
94659465
// CHECK-LABEL: @test_mm512_cvtph_ps
9466-
// CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512
9466+
// CHECK: bitcast <4 x i64> %{{.*}} to <16 x i16>
9467+
// CHECK: bitcast <16 x i16> %{{.*}} to <16 x half>
9468+
// CHECK: fpext <16 x half> %{{.*}} to <16 x float>
94679469
return _mm512_cvtph_ps (__A);
94689470
}
94699471

94709472
__m512 test_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
94719473
{
94729474
// CHECK-LABEL: @test_mm512_mask_cvtph_ps
9473-
// CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512
9475+
// CHECK: bitcast <4 x i64> %{{.*}} to <16 x i16>
9476+
// CHECK: bitcast <16 x i16> %{{.*}} to <16 x half>
9477+
// CHECK: fpext <16 x half> %{{.*}} to <16 x float>
9478+
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
94749479
return _mm512_mask_cvtph_ps (__W,__U,__A);
94759480
}
94769481

94779482
__m512 test_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
94789483
{
94799484
// CHECK-LABEL: @test_mm512_maskz_cvtph_ps
9480-
// CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512
9485+
// CHECK: bitcast <4 x i64> %{{.*}} to <16 x i16>
9486+
// CHECK: bitcast <16 x i16> %{{.*}} to <16 x half>
9487+
// CHECK: fpext <16 x half> %{{.*}} to <16 x float>
9488+
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
94819489
return _mm512_maskz_cvtph_ps (__U,__A);
94829490
}
94839491

clang/test/CodeGen/avx512vl-builtins-constrained.c

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,43 @@
88

99
__m128 test_mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A) {
1010
// COMMON-LABEL: @test_mm_mask_cvtph_ps
11-
// COMMONIR: @llvm.x86.avx512.mask.vcvtph2ps.128
11+
// COMMONIR: bitcast <2 x i64> %{{.*}} to <8 x i16>
12+
// COMMONIR: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13+
// COMMONIR: bitcast <4 x i16> %{{.*}} to <4 x half>
14+
// UNCONSTRAINED: fpext <4 x half> %{{.*}} to <4 x float>
15+
// CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict")
16+
// COMMONIR: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
1217
return _mm_mask_cvtph_ps(__W, __U, __A);
1318
}
1419

1520
__m128 test_mm_maskz_cvtph_ps(__mmask8 __U, __m128i __A) {
1621
// COMMON-LABEL: @test_mm_maskz_cvtph_ps
17-
// COMMONIR: @llvm.x86.avx512.mask.vcvtph2ps.128
22+
// COMMONIR: bitcast <2 x i64> %{{.*}} to <8 x i16>
23+
// COMMONIR: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
24+
// COMMONIR: bitcast <4 x i16> %{{.*}} to <4 x half>
25+
// UNCONSTRAINED: fpext <4 x half> %{{.*}} to <4 x float>
26+
// CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict")
27+
// COMMONIR: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
1828
return _mm_maskz_cvtph_ps(__U, __A);
1929
}
2030

2131
__m256 test_mm256_mask_cvtph_ps(__m256 __W, __mmask8 __U, __m128i __A) {
2232
// COMMON-LABEL: @test_mm256_mask_cvtph_ps
23-
// COMMONIR: @llvm.x86.avx512.mask.vcvtph2ps.256
33+
// COMMONIR: bitcast <2 x i64> %{{.*}} to <8 x i16>
34+
// COMMONIR: bitcast <8 x i16> %{{.*}} to <8 x half>
35+
// UNCONSTRAINED: fpext <8 x half> %{{.*}} to <8 x float>
36+
// CONSTRAINED: call <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8f16(<8 x half> %{{.*}}, metadata !"fpexcept.strict")
37+
// COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
2438
return _mm256_mask_cvtph_ps(__W, __U, __A);
2539
}
2640

2741
__m256 test_mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A) {
2842
// COMMON-LABEL: @test_mm256_maskz_cvtph_ps
29-
// COMMONIR: @llvm.x86.avx512.mask.vcvtph2ps.256
43+
// COMMONIR: bitcast <2 x i64> %{{.*}} to <8 x i16>
44+
// COMMONIR: bitcast <8 x i16> %{{.*}} to <8 x half>
45+
// UNCONSTRAINED: fpext <8 x half> %{{.*}} to <8 x float>
46+
// CONSTRAINED: call <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8f16(<8 x half> %{{.*}}, metadata !"fpexcept.strict")
47+
// COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
3048
return _mm256_maskz_cvtph_ps(__U, __A);
3149
}
3250

clang/test/CodeGen/avx512vl-builtins.c

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9692,25 +9692,39 @@ __m256 test_mm256_maskz_mov_ps(__mmask8 __U, __m256 __A) {
96929692

96939693
__m128 test_mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A) {
96949694
// CHECK-LABEL: @test_mm_mask_cvtph_ps
9695-
// CHECK: @llvm.x86.avx512.mask.vcvtph2ps.128
9695+
// CHECK: bitcast <2 x i64> %{{.*}} to <8 x i16>
9696+
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9697+
// CHECK: bitcast <4 x i16> %{{.*}} to <4 x half>
9698+
// CHECK: fpext <4 x half> %{{.*}} to <4 x float>
9699+
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
96969700
return _mm_mask_cvtph_ps(__W, __U, __A);
96979701
}
96989702

96999703
__m128 test_mm_maskz_cvtph_ps(__mmask8 __U, __m128i __A) {
97009704
// CHECK-LABEL: @test_mm_maskz_cvtph_ps
9701-
// CHECK: @llvm.x86.avx512.mask.vcvtph2ps.128
9705+
// CHECK: bitcast <2 x i64> %{{.*}} to <8 x i16>
9706+
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9707+
// CHECK: bitcast <4 x i16> %{{.*}} to <4 x half>
9708+
// CHECK: fpext <4 x half> %{{.*}} to <4 x float>
9709+
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
97029710
return _mm_maskz_cvtph_ps(__U, __A);
97039711
}
97049712

97059713
__m256 test_mm256_mask_cvtph_ps(__m256 __W, __mmask8 __U, __m128i __A) {
97069714
// CHECK-LABEL: @test_mm256_mask_cvtph_ps
9707-
// CHECK: @llvm.x86.avx512.mask.vcvtph2ps.256
9715+
// CHECK: bitcast <2 x i64> %{{.*}} to <8 x i16>
9716+
// CHECK: bitcast <8 x i16> %{{.*}} to <8 x half>
9717+
// CHECK: fpext <8 x half> %{{.*}} to <8 x float>
9718+
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
97089719
return _mm256_mask_cvtph_ps(__W, __U, __A);
97099720
}
97109721

97119722
__m256 test_mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A) {
97129723
// CHECK-LABEL: @test_mm256_maskz_cvtph_ps
9713-
// CHECK: @llvm.x86.avx512.mask.vcvtph2ps.256
9724+
// CHECK: bitcast <2 x i64> %{{.*}} to <8 x i16>
9725+
// CHECK: bitcast <8 x i16> %{{.*}} to <8 x half>
9726+
// CHECK: fpext <8 x half> %{{.*}} to <8 x float>
9727+
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
97149728
return _mm256_maskz_cvtph_ps(__U, __A);
97159729
}
97169730

clang/test/CodeGen/f16c-builtins-constrained.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ float test_cvtsh_ss(unsigned short a) {
1313
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
1414
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
1515
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
16-
// CHECK: call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %{{.*}})
16+
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17+
// CHECK: bitcast <4 x i16> %{{.*}} to <4 x half>
18+
// CHECK: call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict")
1719
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
1820
return _cvtsh_ss(a);
1921
}
@@ -34,13 +36,18 @@ unsigned short test_cvtss_sh(float a) {
3436

3537
__m128 test_mm_cvtph_ps(__m128i a) {
3638
// CHECK-LABEL: test_mm_cvtph_ps
37-
// CHECK: call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %{{.*}})
39+
// CHECK: bitcast <2 x i64> %{{.*}} to <8 x i16>
40+
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
41+
// CHECK: bitcast <4 x i16> %{{.*}} to <4 x half>
42+
// CHECK: call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict")
3843
return _mm_cvtph_ps(a);
3944
}
4045

4146
__m256 test_mm256_cvtph_ps(__m128i a) {
4247
// CHECK-LABEL: test_mm256_cvtph_ps
43-
// CHECK: call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %{{.*}})
48+
// CHECK: bitcast <2 x i64> %{{.*}} to <8 x i16>
49+
// CHECK: bitcast <8 x i16> %{{.*}} to <8 x half>
50+
// CHECK: call <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8f16(<8 x half> %{{.*}}, metadata !"fpexcept.strict")
4451
return _mm256_cvtph_ps(a);
4552
}
4653

clang/test/CodeGen/f16c-builtins.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ float test_cvtsh_ss(unsigned short a) {
1313
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
1414
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
1515
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
16-
// CHECK: call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %{{.*}})
16+
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17+
// CHECK: bitcast <4 x i16> %{{.*}} to <4 x half>
18+
// CHECK: fpext <4 x half> %{{.*}} to <4 x float>
1719
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
1820
return _cvtsh_ss(a);
1921
}
@@ -31,13 +33,18 @@ unsigned short test_cvtss_sh(float a) {
3133

3234
__m128 test_mm_cvtph_ps(__m128i a) {
3335
// CHECK-LABEL: test_mm_cvtph_ps
34-
// CHECK: call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %{{.*}})
36+
// CHECK: bitcast <2 x i64> %{{.*}} to <8 x i16>
37+
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
38+
// CHECK: bitcast <4 x i16> %{{.*}} to <4 x half>
39+
// CHECK: fpext <4 x half> %{{.*}} to <4 x float>
3540
return _mm_cvtph_ps(a);
3641
}
3742

3843
__m256 test_mm256_cvtph_ps(__m128i a) {
3944
// CHECK-LABEL: test_mm256_cvtph_ps
40-
// CHECK: call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %{{.*}})
45+
// CHECK: bitcast <2 x i64> %{{.*}} to <8 x i16>
46+
// CHECK: bitcast <8 x i16> %{{.*}} to <8 x half>
47+
// CHECK: fpext <8 x half> %{{.*}} to <8 x float>
4148
return _mm256_cvtph_ps(a);
4249
}
4350

llvm/include/llvm/IR/IntrinsicsX86.td

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2546,26 +2546,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
25462546
// Half float conversion
25472547

25482548
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
2549-
def int_x86_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps">,
2550-
Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
2551-
def int_x86_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256">,
2552-
Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
25532549
def int_x86_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph">,
25542550
Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty],
25552551
[IntrNoMem, ImmArg<1>]>;
25562552
def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">,
25572553
Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty],
25582554
[IntrNoMem, ImmArg<1>]>;
2559-
def int_x86_avx512_mask_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512_mask">,
2555+
def int_x86_avx512_mask_vcvtph2ps_512 :
25602556
Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty, llvm_v16f32_ty,
25612557
llvm_i16_ty, llvm_i32_ty],
25622558
[IntrNoMem, ImmArg<3>]>;
2563-
def int_x86_avx512_mask_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256_mask">,
2564-
Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty, llvm_v8f32_ty,
2565-
llvm_i8_ty], [IntrNoMem]>;
2566-
def int_x86_avx512_mask_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps_mask">,
2567-
Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty, llvm_v4f32_ty,
2568-
llvm_i8_ty], [IntrNoMem]>;
25692559
def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">,
25702560
Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty,
25712561
llvm_v16i16_ty, llvm_i16_ty],

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
204204
Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
205205
Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
206206
Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
207+
Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
208+
Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
207209
Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
208210
Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
209211
Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
@@ -316,6 +318,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
316318
Name == "avx.cvtdq2.pd.256" || // Added in 3.9
317319
Name == "avx.cvtdq2.ps.256" || // Added in 7.0
318320
Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
321+
Name.startswith("vcvtph2ps.") || // Added in 11.0
319322
Name.startswith("avx.vinsertf128.") || // Added in 3.7
320323
Name == "avx2.vinserti128" || // Added in 3.7
321324
Name.startswith("avx512.mask.insert") || // Added in 4.0
@@ -2132,6 +2135,23 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
21322135
: Builder.CreateSIToFP(Rep, DstTy, "cvt");
21332136
}
21342137

2138+
if (CI->getNumArgOperands() >= 3)
2139+
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2140+
CI->getArgOperand(1));
2141+
} else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
2142+
Name.startswith("vcvtph2ps."))) {
2143+
Type *DstTy = CI->getType();
2144+
Rep = CI->getArgOperand(0);
2145+
Type *SrcTy = Rep->getType();
2146+
unsigned NumDstElts = DstTy->getVectorNumElements();
2147+
if (NumDstElts != SrcTy->getVectorNumElements()) {
2148+
assert(NumDstElts == 4 && "Unexpected vector size");
2149+
uint32_t ShuffleMask[4] = {0, 1, 2, 3};
2150+
Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
2151+
}
2152+
Rep = Builder.CreateBitCast(
2153+
Rep, VectorType::get(Type::getHalfTy(C), NumDstElts));
2154+
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
21352155
if (CI->getNumArgOperands() >= 3)
21362156
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
21372157
CI->getArgOperand(1));

llvm/lib/Target/X86/X86IntrinsicsInfo.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -783,10 +783,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
783783
X86ISD::FSUBS, X86ISD::FSUBS_RND),
784784
X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK,
785785
X86ISD::FSUBS, X86ISD::FSUBS_RND),
786-
X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK,
787-
X86ISD::CVTPH2PS, 0),
788-
X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK,
789-
X86ISD::CVTPH2PS, 0),
790786
X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_SAE,
791787
X86ISD::CVTPH2PS, X86ISD::CVTPH2PS_SAE),
792788
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, CVTPS2PH_MASK,
@@ -1108,8 +1104,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
11081104
X86_INTRINSIC_DATA(subborrow_64, ADX, X86ISD::SBB, X86ISD::SUB),
11091105
X86_INTRINSIC_DATA(tbm_bextri_u32, BEXTRI, X86ISD::BEXTR, 0),
11101106
X86_INTRINSIC_DATA(tbm_bextri_u64, BEXTRI, X86ISD::BEXTR, 0),
1111-
X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
1112-
X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
11131107
X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
11141108
X86_INTRINSIC_DATA(vcvtps2ph_256, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
11151109

0 commit comments

Comments
 (0)