Skip to content

Commit 8511163

Browse files
[AArch64][SVE] Fix definition of bfloat fcvt intrinsics.
Affected intrinsics: llvm.aarch64.sve.fcvt.bf16f32 llvm.aarch64.sve.fcvtnt.bf16f32 The named intrinsics took a predicate based on the smallest element type when it should be based on the largest. The intrinsics have been replace by v2 equivalents and affected code ported to use them. Patch includes changes to getSVEPredicateBitCast() that ensure the generated code for the auto-upgraded old intrinsics is unchanged.
1 parent 5bbbaa1 commit 8511163

File tree

12 files changed

+115
-53
lines changed

12 files changed

+115
-53
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -943,11 +943,6 @@ defm SVFCVTZS_S64_F16 : SInstCvtMXZ<"svcvt_s64[_f16]", "ddPO", "dPO", "l", "aar
943943
defm SVFCVTZS_S32_F32 : SInstCvtMXZ<"svcvt_s32[_f32]", "ddPM", "dPM", "i", "aarch64_sve_fcvtzs", [IsOverloadCvt]>;
944944
defm SVFCVTZS_S64_F32 : SInstCvtMXZ<"svcvt_s64[_f32]", "ddPM", "dPM", "l", "aarch64_sve_fcvtzs_i64f32">;
945945

946-
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
947-
defm SVCVT_BF16_F32 : SInstCvtMXZ<"svcvt_bf16[_f32]", "ddPM", "dPM", "b", "aarch64_sve_fcvt_bf16f32">;
948-
def SVCVTNT_BF16_F32 : SInst<"svcvtnt_bf16[_f32]", "ddPM", "b", MergeOp1, "aarch64_sve_fcvtnt_bf16f32", [IsOverloadNone, VerifyRuntimeMode]>;
949-
}
950-
951946
// svcvt_s##_f64
952947
defm SVFCVTZS_S32_F64 : SInstCvtMXZ<"svcvt_s32[_f64]", "ttPd", "tPd", "d", "aarch64_sve_fcvtzs_i32f64">;
953948
defm SVFCVTZS_S64_F64 : SInstCvtMXZ<"svcvt_s64[_f64]", "ddPN", "dPN", "l", "aarch64_sve_fcvtzs", [IsOverloadCvt]>;
@@ -1003,6 +998,13 @@ defm SVFCVT_F32_F64 : SInstCvtMXZ<"svcvt_f32[_f64]", "MMPd", "MPd", "d", "aarc
1003998
defm SVFCVT_F64_F16 : SInstCvtMXZ<"svcvt_f64[_f16]", "ddPO", "dPO", "d", "aarch64_sve_fcvt_f64f16">;
1004999
defm SVFCVT_F64_F32 : SInstCvtMXZ<"svcvt_f64[_f32]", "ddPM", "dPM", "d", "aarch64_sve_fcvt_f64f32">;
10051000

1001+
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
1002+
defm SVCVT_BF16_F32 : SInstCvtMXZ<"svcvt_bf16[_f32]", "$$Pd", "$Pd", "f", "aarch64_sve_fcvt_bf16f32_v2">;
1003+
1004+
def SVCVTNT_BF16 : SInst<"svcvtnt_bf16[_f32]", "$$Pd", "f", MergeOp1, "aarch64_sve_fcvtnt_bf16f32_v2", [IsOverloadNone, VerifyRuntimeMode]>;
1005+
// SVCVTNT_X_BF16 : Implemented as macro by SveEmitter.cpp
1006+
}
1007+
10061008
let SVETargetGuard = "sve2", SMETargetGuard = "sme" in {
10071009
defm SVCVTLT_F32 : SInstCvtMX<"svcvtlt_f32[_f16]", "ddPh", "dPh", "f", "aarch64_sve_fcvtlt_f32f16">;
10081010
defm SVCVTLT_F64 : SInstCvtMX<"svcvtlt_f64[_f32]", "ddPh", "dPh", "d", "aarch64_sve_fcvtlt_f64f32">;
@@ -1015,7 +1017,6 @@ def SVCVTNT_F64 : SInst<"svcvtnt_f32[_f64]", "hhPd", "d", MergeOp1, "aarch6
10151017

10161018
def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch64_sve_fcvtxnt_f32f64", [IsOverloadNone, VerifyRuntimeMode]>;
10171019
// SVCVTXNT_X_F32 : Implemented as macro by SveEmitter.cpp
1018-
10191020
}
10201021

10211022
////////////////////////////////////////////////////////////////////////////////

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt-bfloat.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@
2424

2525
// CHECK-LABEL: @test_svcvt_bf16_f32_x(
2626
// CHECK-NEXT: entry:
27-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
28-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
27+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
28+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
2929
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
3030
//
3131
// CPP-CHECK-LABEL: @_Z21test_svcvt_bf16_f32_xu10__SVBool_tu13__SVFloat32_t(
3232
// CPP-CHECK-NEXT: entry:
33-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
34-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
33+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
34+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
3535
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
3636
//
3737
svbfloat16_t test_svcvt_bf16_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR {
@@ -40,14 +40,14 @@ svbfloat16_t test_svcvt_bf16_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR {
4040

4141
// CHECK-LABEL: @test_svcvt_bf16_f32_z(
4242
// CHECK-NEXT: entry:
43-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
44-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
43+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
44+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
4545
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
4646
//
4747
// CPP-CHECK-LABEL: @_Z21test_svcvt_bf16_f32_zu10__SVBool_tu13__SVFloat32_t(
4848
// CPP-CHECK-NEXT: entry:
49-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
50-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
49+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
50+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
5151
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
5252
//
5353
svbfloat16_t test_svcvt_bf16_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR {
@@ -56,14 +56,14 @@ svbfloat16_t test_svcvt_bf16_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR {
5656

5757
// CHECK-LABEL: @test_svcvt_bf16_f32_m(
5858
// CHECK-NEXT: entry:
59-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
60-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
59+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
60+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
6161
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
6262
//
6363
// CPP-CHECK-LABEL: @_Z21test_svcvt_bf16_f32_mu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t(
6464
// CPP-CHECK-NEXT: entry:
65-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
66-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
65+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
66+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
6767
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
6868
//
6969
svbfloat16_t test_svcvt_bf16_f32_m(svbfloat16_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR {

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@
2424

2525
// CHECK-LABEL: @test_svcvtnt_bf16_f32_x(
2626
// CHECK-NEXT: entry:
27-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
28-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
27+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
28+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
2929
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
3030
//
3131
// CPP-CHECK-LABEL: @_Z23test_svcvtnt_bf16_f32_xu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t(
3232
// CPP-CHECK-NEXT: entry:
33-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
34-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
33+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
34+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
3535
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
3636
//
3737
svbfloat16_t test_svcvtnt_bf16_f32_x(svbfloat16_t even, svbool_t pg, svfloat32_t op) MODE_ATTR {
@@ -40,14 +40,14 @@ svbfloat16_t test_svcvtnt_bf16_f32_x(svbfloat16_t even, svbool_t pg, svfloat32_t
4040

4141
// CHECK-LABEL: @test_svcvtnt_bf16_f32_m(
4242
// CHECK-NEXT: entry:
43-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
44-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
43+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
44+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
4545
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
4646
//
4747
// CPP-CHECK-LABEL: @_Z23test_svcvtnt_bf16_f32_mu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t(
4848
// CPP-CHECK-NEXT: entry:
49-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
50-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
49+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
50+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
5151
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
5252
//
5353
svbfloat16_t test_svcvtnt_bf16_f32_m(svbfloat16_t even, svbool_t pg, svfloat32_t op) MODE_ATTR {

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2177,8 +2177,8 @@ def int_aarch64_sve_fcvtzs_i32f64 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1
21772177
def int_aarch64_sve_fcvtzs_i64f16 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
21782178
def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
21792179

2180-
def int_aarch64_sve_fcvt_bf16f32 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
2181-
def int_aarch64_sve_fcvtnt_bf16f32 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
2180+
def int_aarch64_sve_fcvt_bf16f32_v2 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
2181+
def int_aarch64_sve_fcvtnt_bf16f32_v2 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
21822182

21832183
def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
21842184
def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,12 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
846846
return false; // No other 'aarch64.sve.bf*'.
847847
}
848848

849+
// 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
850+
if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
851+
NewFn = nullptr;
852+
return true;
853+
}
854+
849855
if (Name.consume_front("addqv")) {
850856
// 'aarch64.sve.addqv'.
851857
if (!F->getReturnType()->isFPOrFPVectorTy())
@@ -4053,6 +4059,30 @@ static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
40534059
return Rep;
40544060
}
40554061

4062+
static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI,
4063+
Function *F, IRBuilder<> &Builder) {
4064+
Intrinsic::ID NewID =
4065+
StringSwitch<Intrinsic::ID>(Name)
4066+
.Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4067+
.Case("sve.fcvtnt.bf16f32", Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4068+
.Default(Intrinsic::not_intrinsic);
4069+
if (NewID == Intrinsic::not_intrinsic)
4070+
llvm_unreachable("Unhandled Intrinsic!");
4071+
4072+
SmallVector<Value *, 3> Args(CI->args());
4073+
4074+
// The original intrinsics incorrectly used a predicated based on the smallest
4075+
// element type rather than the largest.
4076+
Type *PredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4077+
Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4078+
Args[1]->getType(), Args[1]);
4079+
Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
4080+
PredTy, Args[1]);
4081+
4082+
Function *NewF = Intrinsic::getDeclaration(CI->getModule(), NewID);
4083+
return Builder.CreateCall(NewF, Args, CI->getName());
4084+
}
4085+
40564086
static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
40574087
IRBuilder<> &Builder) {
40584088
if (Name == "mve.vctp64.old") {
@@ -4306,6 +4336,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
43064336

43074337
bool IsX86 = Name.consume_front("x86.");
43084338
bool IsNVVM = Name.consume_front("nvvm.");
4339+
bool IsAArch64 = Name.consume_front("aarch64.");
43094340
bool IsARM = Name.consume_front("arm.");
43104341
bool IsAMDGCN = Name.consume_front("amdgcn.");
43114342
bool IsDbg = Name.consume_front("dbg.");
@@ -4317,6 +4348,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
43174348
Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
43184349
} else if (IsX86) {
43194350
Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4351+
} else if (IsAArch64) {
4352+
Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
43204353
} else if (IsARM) {
43214354
Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
43224355
} else if (IsAMDGCN) {

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5551,6 +5551,14 @@ static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) {
55515551
if (InVT == VT)
55525552
return Op;
55535553

5554+
// Look through casts to <n x 16 x i1> when their input has more lanes than
5555+
// VT. This will increase the chances of removing casts that introduce new
5556+
// lanes, which have to be explicitly zero'd.
5557+
if (Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
5558+
Op.getConstantOperandVal(0) == Intrinsic::aarch64_sve_convert_to_svbool &&
5559+
Op.getOperand(1).getValueType().bitsGT(VT))
5560+
Op = Op.getOperand(1);
5561+
55545562
SDValue Reinterpret = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
55555563

55565564
// We only have to zero the lanes if new lanes are being defined, e.g. when

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2425,8 +2425,8 @@ let Predicates = [HasBF16, HasSVEorSME] in {
24252425
defm BFMLALT_ZZZ : sve2_fp_mla_long<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt>;
24262426
defm BFMLALB_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b100, "bfmlalb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalb_lane_v2>;
24272427
defm BFMLALT_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt_lane_v2>;
2428-
defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32, AArch64fcvtr_mt>;
2429-
defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>;
2428+
defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
2429+
defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
24302430
} // End HasBF16, HasSVEorSME
24312431

24322432
let Predicates = [HasSVEorSME] in {

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2147,7 +2147,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
21472147
switch (IID) {
21482148
default:
21492149
break;
2150-
case Intrinsic::aarch64_sve_fcvt_bf16f32:
2150+
case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
21512151
case Intrinsic::aarch64_sve_fcvt_f16f32:
21522152
case Intrinsic::aarch64_sve_fcvt_f16f64:
21532153
case Intrinsic::aarch64_sve_fcvt_f32f16:
@@ -2178,7 +2178,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
21782178
case Intrinsic::aarch64_sve_ucvtf_f32i64:
21792179
case Intrinsic::aarch64_sve_ucvtf_f64i32:
21802180
return instCombineSVEAllOrNoActiveUnary(IC, II);
2181-
case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
2181+
case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
21822182
case Intrinsic::aarch64_sve_fcvtnt_f16f32:
21832183
case Intrinsic::aarch64_sve_fcvtnt_f32f64:
21842184
case Intrinsic::aarch64_sve_fcvtxnt_f32f64:

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8811,7 +8811,7 @@ multiclass sve_bfloat_convert<bit N, string asm, SDPatternOperator op,
88118811
SDPatternOperator ir_op = null_frag> {
88128812
def NAME : sve_bfloat_convert<N, asm>;
88138813

8814-
def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8i1, nxv4f32, !cast<Instruction>(NAME)>;
8814+
def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
88158815
def : SVE_1_Op_Passthru_Round_Pat<nxv4bf16, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
88168816
def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
88178817
}

0 commit comments

Comments
 (0)