Skip to content

Commit 91a2056

Browse files
authored
[AArch64][SVE] Instcombine ptrue(all) to splat(i1) (#135016)
SVE Operations such as predicated loads become canonicalized to LLVM masked loads, and doing the same for ptrue(all) to splat(1) creates further optimization opportunities from generic LLVM IR passes.
1 parent e555cca commit 91a2056

11 files changed

+338
-531
lines changed

clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,19 +52,17 @@ vec2048 x2048 = {0, 1, 2, 3, 3 , 2 , 1, 0, 0, 1, 2, 3, 3 , 2 , 1, 0,
5252
typedef int8_t vec_int8 __attribute__((vector_size(N / 8)));
5353
// CHECK128-LABEL: define{{.*}} <16 x i8> @f2(<16 x i8> noundef %x)
5454
// CHECK128-NEXT: entry:
55-
// CHECK128-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
5655
// CHECK128-NEXT: [[CASTSCALABLESVE:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> [[X:%.*]], i64 0)
57-
// CHECK128-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1)
56+
// CHECK128-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1)
5857
// CHECK128-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[TMP1]], i64 0)
5958
// CHECK128-NEXT: ret <16 x i8> [[CASTFIXEDSVE]]
6059

6160
// CHECK-LABEL: define{{.*}} void @f2(
6261
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<[[#div(VBITS,8)]] x i8>) align 16 captures(none) initializes((0, [[#div(VBITS,8)]])) %agg.result, ptr noundef readonly captures(none) %0)
6362
// CHECK-NEXT: entry:
6463
// CHECK-NEXT: [[X:%.*]] = load <[[#div(VBITS,8)]] x i8>, ptr [[TMP0:%.*]], align 16, [[TBAA6:!tbaa !.*]]
65-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
6664
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v[[#div(VBITS,8)]]i8(<vscale x 16 x i8> poison, <[[#div(VBITS,8)]] x i8> [[X]], i64 0)
67-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1)
65+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1)
6866
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <[[#div(VBITS,8)]] x i8> @llvm.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8(<vscale x 16 x i8> [[TMP2]], i64 0)
6967
// CHECK-NEXT: store <[[#div(VBITS,8)]] x i8> [[CASTFIXEDSVE]], ptr [[AGG_RESULT:%.*]], align 16, [[TBAA6]]
7068
// CHECK-NEXT: ret void

clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,12 @@
77

88
// CHECK-LABEL: @test_svrdffr(
99
// CHECK-NEXT: entry:
10-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
11-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP0]])
10+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> splat (i1 true))
1211
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
1312
//
1413
// CPP-CHECK-LABEL: @_Z12test_svrdffrv(
1514
// CPP-CHECK-NEXT: entry:
16-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
17-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP0]])
15+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> splat (i1 true))
1816
// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
1917
//
2018
svbool_t test_svrdffr()

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1492,9 +1492,8 @@ static bool isAllActivePredicate(Value *Pred) {
14921492
if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <=
14931493
cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements())
14941494
Pred = UncastedPred;
1495-
1496-
return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1497-
m_ConstantInt<AArch64SVEPredPattern::all>()));
1495+
auto *C = dyn_cast<Constant>(Pred);
1496+
return (C && C->isAllOnesValue());
14981497
}
14991498

15001499
// Use SVE intrinsic info to eliminate redundant operands and/or canonicalise
@@ -1701,14 +1700,7 @@ static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
17011700
IntrinsicInst &II) {
17021701
LLVMContext &Ctx = II.getContext();
17031702

1704-
// Check that the predicate is all active
1705-
auto *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(0));
1706-
if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1707-
return std::nullopt;
1708-
1709-
const auto PTruePattern =
1710-
cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
1711-
if (PTruePattern != AArch64SVEPredPattern::all)
1703+
if (!isAllActivePredicate(II.getArgOperand(0)))
17121704
return std::nullopt;
17131705

17141706
// Check that we have a compare of zero..
@@ -2118,8 +2110,7 @@ instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) {
21182110
auto *OpPredicate = II.getOperand(0);
21192111
auto BinOpCode = intrinsicIDToBinOpCode(II.getIntrinsicID());
21202112
if (BinOpCode == Instruction::BinaryOpsEnd ||
2121-
!match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
2122-
m_ConstantInt<AArch64SVEPredPattern::all>())))
2113+
!isAllActivePredicate(OpPredicate))
21232114
return std::nullopt;
21242115
auto BinOp = IC.Builder.CreateBinOpFMF(
21252116
BinOpCode, II.getOperand(1), II.getOperand(2), II.getFastMathFlags());
@@ -2641,6 +2632,13 @@ static std::optional<Instruction *> instCombineDMB(InstCombiner &IC,
26412632
return std::nullopt;
26422633
}
26432634

2635+
static std::optional<Instruction *> instCombinePTrue(InstCombiner &IC,
2636+
IntrinsicInst &II) {
2637+
if (match(II.getOperand(0), m_ConstantInt<AArch64SVEPredPattern::all>()))
2638+
return IC.replaceInstUsesWith(II, Constant::getAllOnesValue(II.getType()));
2639+
return std::nullopt;
2640+
}
2641+
26442642
std::optional<Instruction *>
26452643
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
26462644
IntrinsicInst &II) const {
@@ -2744,6 +2742,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
27442742
return instCombineSVEDupqLane(IC, II);
27452743
case Intrinsic::aarch64_sve_insr:
27462744
return instCombineSVEInsr(IC, II);
2745+
case Intrinsic::aarch64_sve_ptrue:
2746+
return instCombinePTrue(IC, II);
27472747
}
27482748

27492749
return std::nullopt;

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ define <vscale x 8 x i16> @srshl_abs_positive_merge(<vscale x 8 x i16> %a, <vsca
4242

4343
define <vscale x 8 x i16> @srshl_abs_all_active_pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg2) #0 {
4444
; CHECK-LABEL: @srshl_abs_all_active_pred(
45-
; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
46-
; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A:%.*]])
45+
; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A:%.*]])
4746
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> splat (i16 2))
4847
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
4948
;

0 commit comments

Comments
 (0)