Skip to content

Commit 7fd66c7

Browse files
[AArch64][SVE] Fix definition of bfloat fcvt intrinsics.
Affected intrinsics: llvm.aarch64.sve.fcvt.bf16f32 llvm.aarch64.sve.fcvtnt.bf16f32 The named intrinsics took a predicate based on the smallest element type when it should be based on the largest. The intrinsics have been replace by v2 equivalents and affected code ported to use them. Patch includes changes to getSVEPredicateBitCast() that ensure the generated code for the auto-upgraded old intrinsics is unchanged.
1 parent 79ecb81 commit 7fd66c7

File tree

12 files changed

+115
-53
lines changed

12 files changed

+115
-53
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -943,11 +943,6 @@ defm SVFCVTZS_S64_F16 : SInstCvtMXZ<"svcvt_s64[_f16]", "ddPO", "dPO", "l", "aar
943943
defm SVFCVTZS_S32_F32 : SInstCvtMXZ<"svcvt_s32[_f32]", "ddPM", "dPM", "i", "aarch64_sve_fcvtzs", [IsOverloadCvt]>;
944944
defm SVFCVTZS_S64_F32 : SInstCvtMXZ<"svcvt_s64[_f32]", "ddPM", "dPM", "l", "aarch64_sve_fcvtzs_i64f32">;
945945

946-
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
947-
defm SVCVT_BF16_F32 : SInstCvtMXZ<"svcvt_bf16[_f32]", "ddPM", "dPM", "b", "aarch64_sve_fcvt_bf16f32">;
948-
def SVCVTNT_BF16_F32 : SInst<"svcvtnt_bf16[_f32]", "ddPM", "b", MergeOp1, "aarch64_sve_fcvtnt_bf16f32", [IsOverloadNone, VerifyRuntimeMode]>;
949-
}
950-
951946
// svcvt_s##_f64
952947
defm SVFCVTZS_S32_F64 : SInstCvtMXZ<"svcvt_s32[_f64]", "ttPd", "tPd", "d", "aarch64_sve_fcvtzs_i32f64">;
953948
defm SVFCVTZS_S64_F64 : SInstCvtMXZ<"svcvt_s64[_f64]", "ddPN", "dPN", "l", "aarch64_sve_fcvtzs", [IsOverloadCvt]>;
@@ -1003,6 +998,13 @@ defm SVFCVT_F32_F64 : SInstCvtMXZ<"svcvt_f32[_f64]", "MMPd", "MPd", "d", "aarc
1003998
defm SVFCVT_F64_F16 : SInstCvtMXZ<"svcvt_f64[_f16]", "ddPO", "dPO", "d", "aarch64_sve_fcvt_f64f16">;
1004999
defm SVFCVT_F64_F32 : SInstCvtMXZ<"svcvt_f64[_f32]", "ddPM", "dPM", "d", "aarch64_sve_fcvt_f64f32">;
10051000

1001+
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
1002+
defm SVCVT_BF16_F32 : SInstCvtMXZ<"svcvt_bf16[_f32]", "$$Pd", "$Pd", "f", "aarch64_sve_fcvt_bf16f32_v2">;
1003+
1004+
def SVCVTNT_BF16 : SInst<"svcvtnt_bf16[_f32]", "$$Pd", "f", MergeOp1, "aarch64_sve_fcvtnt_bf16f32_v2", [IsOverloadNone, VerifyRuntimeMode]>;
1005+
// SVCVTNT_X_BF16 : Implemented as macro by SveEmitter.cpp
1006+
}
1007+
10061008
let SVETargetGuard = "sve2", SMETargetGuard = "sme" in {
10071009
defm SVCVTLT_F32 : SInstCvtMX<"svcvtlt_f32[_f16]", "ddPh", "dPh", "f", "aarch64_sve_fcvtlt_f32f16">;
10081010
defm SVCVTLT_F64 : SInstCvtMX<"svcvtlt_f64[_f32]", "ddPh", "dPh", "d", "aarch64_sve_fcvtlt_f64f32">;
@@ -1015,7 +1017,6 @@ def SVCVTNT_F64 : SInst<"svcvtnt_f32[_f64]", "hhPd", "d", MergeOp1, "aarch6
10151017

10161018
def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch64_sve_fcvtxnt_f32f64", [IsOverloadNone, VerifyRuntimeMode]>;
10171019
// SVCVTXNT_X_F32 : Implemented as macro by SveEmitter.cpp
1018-
10191020
}
10201021

10211022
////////////////////////////////////////////////////////////////////////////////

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt-bfloat.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@
2424

2525
// CHECK-LABEL: @test_svcvt_bf16_f32_x(
2626
// CHECK-NEXT: entry:
27-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
28-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
27+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
28+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
2929
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
3030
//
3131
// CPP-CHECK-LABEL: @_Z21test_svcvt_bf16_f32_xu10__SVBool_tu13__SVFloat32_t(
3232
// CPP-CHECK-NEXT: entry:
33-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
34-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
33+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
34+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
3535
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
3636
//
3737
svbfloat16_t test_svcvt_bf16_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR {
@@ -40,14 +40,14 @@ svbfloat16_t test_svcvt_bf16_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR {
4040

4141
// CHECK-LABEL: @test_svcvt_bf16_f32_z(
4242
// CHECK-NEXT: entry:
43-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
44-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
43+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
44+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
4545
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
4646
//
4747
// CPP-CHECK-LABEL: @_Z21test_svcvt_bf16_f32_zu10__SVBool_tu13__SVFloat32_t(
4848
// CPP-CHECK-NEXT: entry:
49-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
50-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
49+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
50+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
5151
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
5252
//
5353
svbfloat16_t test_svcvt_bf16_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR {
@@ -56,14 +56,14 @@ svbfloat16_t test_svcvt_bf16_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR {
5656

5757
// CHECK-LABEL: @test_svcvt_bf16_f32_m(
5858
// CHECK-NEXT: entry:
59-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
60-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
59+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
60+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
6161
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
6262
//
6363
// CPP-CHECK-LABEL: @_Z21test_svcvt_bf16_f32_mu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t(
6464
// CPP-CHECK-NEXT: entry:
65-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
66-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
65+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
66+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
6767
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
6868
//
6969
svbfloat16_t test_svcvt_bf16_f32_m(svbfloat16_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR {

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@
2424

2525
// CHECK-LABEL: @test_svcvtnt_bf16_f32_x(
2626
// CHECK-NEXT: entry:
27-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
28-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
27+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
28+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
2929
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
3030
//
3131
// CPP-CHECK-LABEL: @_Z23test_svcvtnt_bf16_f32_xu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t(
3232
// CPP-CHECK-NEXT: entry:
33-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
34-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
33+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
34+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
3535
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
3636
//
3737
svbfloat16_t test_svcvtnt_bf16_f32_x(svbfloat16_t even, svbool_t pg, svfloat32_t op) MODE_ATTR {
@@ -40,14 +40,14 @@ svbfloat16_t test_svcvtnt_bf16_f32_x(svbfloat16_t even, svbool_t pg, svfloat32_t
4040

4141
// CHECK-LABEL: @test_svcvtnt_bf16_f32_m(
4242
// CHECK-NEXT: entry:
43-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
44-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
43+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
44+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
4545
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
4646
//
4747
// CPP-CHECK-LABEL: @_Z23test_svcvtnt_bf16_f32_mu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t(
4848
// CPP-CHECK-NEXT: entry:
49-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
50-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
49+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
50+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
5151
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
5252
//
5353
svbfloat16_t test_svcvtnt_bf16_f32_m(svbfloat16_t even, svbool_t pg, svfloat32_t op) MODE_ATTR {

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2184,8 +2184,8 @@ def int_aarch64_sve_fcvtzs_i32f64 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1
21842184
def int_aarch64_sve_fcvtzs_i64f16 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
21852185
def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
21862186

2187-
def int_aarch64_sve_fcvt_bf16f32 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
2188-
def int_aarch64_sve_fcvtnt_bf16f32 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
2187+
def int_aarch64_sve_fcvt_bf16f32_v2 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
2188+
def int_aarch64_sve_fcvtnt_bf16f32_v2 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
21892189

21902190
def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
21912191
def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,12 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
846846
return false; // No other 'aarch64.sve.bf*'.
847847
}
848848

849+
// 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
850+
if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
851+
NewFn = nullptr;
852+
return true;
853+
}
854+
849855
if (Name.consume_front("addqv")) {
850856
// 'aarch64.sve.addqv'.
851857
if (!F->getReturnType()->isFPOrFPVectorTy())
@@ -4072,6 +4078,30 @@ static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
40724078
return Rep;
40734079
}
40744080

4081+
static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI,
4082+
Function *F, IRBuilder<> &Builder) {
4083+
Intrinsic::ID NewID =
4084+
StringSwitch<Intrinsic::ID>(Name)
4085+
.Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4086+
.Case("sve.fcvtnt.bf16f32", Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4087+
.Default(Intrinsic::not_intrinsic);
4088+
if (NewID == Intrinsic::not_intrinsic)
4089+
llvm_unreachable("Unhandled Intrinsic!");
4090+
4091+
SmallVector<Value *, 3> Args(CI->args());
4092+
4093+
// The original intrinsics incorrectly used a predicate based on the smallest
4094+
// element type rather than the largest.
4095+
Type *PredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4096+
Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4097+
Args[1]->getType(), Args[1]);
4098+
Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
4099+
PredTy, Args[1]);
4100+
4101+
Function *NewF = Intrinsic::getDeclaration(CI->getModule(), NewID);
4102+
return Builder.CreateCall(NewF, Args, CI->getName());
4103+
}
4104+
40754105
static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
40764106
IRBuilder<> &Builder) {
40774107
if (Name == "mve.vctp64.old") {
@@ -4325,6 +4355,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
43254355

43264356
bool IsX86 = Name.consume_front("x86.");
43274357
bool IsNVVM = Name.consume_front("nvvm.");
4358+
bool IsAArch64 = Name.consume_front("aarch64.");
43284359
bool IsARM = Name.consume_front("arm.");
43294360
bool IsAMDGCN = Name.consume_front("amdgcn.");
43304361
bool IsDbg = Name.consume_front("dbg.");
@@ -4336,6 +4367,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
43364367
Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
43374368
} else if (IsX86) {
43384369
Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4370+
} else if (IsAArch64) {
4371+
Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
43394372
} else if (IsARM) {
43404373
Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
43414374
} else if (IsAMDGCN) {

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5553,6 +5553,14 @@ static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) {
55535553
if (InVT == VT)
55545554
return Op;
55555555

5556+
// Look through casts to <n x 16 x i1> when their input has more lanes than
5557+
// VT. This will increase the chances of removing casts that introduce new
5558+
// lanes, which have to be explicitly zero'd.
5559+
if (Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
5560+
Op.getConstantOperandVal(0) == Intrinsic::aarch64_sve_convert_to_svbool &&
5561+
Op.getOperand(1).getValueType().bitsGT(VT))
5562+
Op = Op.getOperand(1);
5563+
55565564
SDValue Reinterpret = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
55575565

55585566
// We only have to zero the lanes if new lanes are being defined, e.g. when

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2425,8 +2425,8 @@ let Predicates = [HasBF16, HasSVEorSME] in {
24252425
defm BFMLALT_ZZZ : sve2_fp_mla_long<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt>;
24262426
defm BFMLALB_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b100, "bfmlalb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalb_lane_v2>;
24272427
defm BFMLALT_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt_lane_v2>;
2428-
defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32, AArch64fcvtr_mt>;
2429-
defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>;
2428+
defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
2429+
defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
24302430
} // End HasBF16, HasSVEorSME
24312431

24322432
let Predicates = [HasSVEorSME] in {

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2157,7 +2157,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
21572157
switch (IID) {
21582158
default:
21592159
break;
2160-
case Intrinsic::aarch64_sve_fcvt_bf16f32:
2160+
case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
21612161
case Intrinsic::aarch64_sve_fcvt_f16f32:
21622162
case Intrinsic::aarch64_sve_fcvt_f16f64:
21632163
case Intrinsic::aarch64_sve_fcvt_f32f16:
@@ -2188,7 +2188,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
21882188
case Intrinsic::aarch64_sve_ucvtf_f32i64:
21892189
case Intrinsic::aarch64_sve_ucvtf_f64i32:
21902190
return instCombineSVEAllOrNoActiveUnary(IC, II);
2191-
case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
2191+
case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
21922192
case Intrinsic::aarch64_sve_fcvtnt_f16f32:
21932193
case Intrinsic::aarch64_sve_fcvtnt_f32f64:
21942194
case Intrinsic::aarch64_sve_fcvtxnt_f32f64:

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8811,7 +8811,7 @@ multiclass sve_bfloat_convert<bit N, string asm, SDPatternOperator op,
88118811
SDPatternOperator ir_op = null_frag> {
88128812
def NAME : sve_bfloat_convert<N, asm>;
88138813

8814-
def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8i1, nxv4f32, !cast<Instruction>(NAME)>;
8814+
def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
88158815
def : SVE_1_Op_Passthru_Round_Pat<nxv4bf16, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
88168816
def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
88178817
}

0 commit comments

Comments
 (0)