Skip to content

Commit a9c8acf

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:4e0c6d30576a into amd-gfx:cda237852fbe
Local branch amd-gfx cda2378 Merged main:28ae42e66251 into amd-gfx:b6fe60a67eed Remote branch main 4e0c6d3 Fix build warning caused by mixed signed/unsigned compare (llvm#69797)
2 parents cda2378 + 4e0c6d3 commit a9c8acf

File tree

15 files changed

+480
-59
lines changed

15 files changed

+480
-59
lines changed

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 478214
19+
#define LLVM_MAIN_REVISION 478222
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -164,28 +164,39 @@ struct RISCVIncomingValueHandler : public CallLowering::IncomingValueHandler {
164164

165165
void assignValueToReg(Register ValVReg, Register PhysReg,
166166
CCValAssign VA) override {
167-
// Copy argument received in physical register to desired VReg.
168-
MIRBuilder.getMBB().addLiveIn(PhysReg);
169-
MIRBuilder.buildCopy(ValVReg, PhysReg);
167+
markPhysRegUsed(PhysReg);
168+
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
170169
}
171170

171+
/// How the physical register gets marked varies between formal
172+
/// parameters (it's a basic-block live-in), and a call instruction
173+
/// (it's an implicit-def of the BL).
174+
virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
175+
172176
private:
173177
const RISCVSubtarget &Subtarget;
174178
};
175179

180+
struct RISCVFormalArgHandler : public RISCVIncomingValueHandler {
181+
RISCVFormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI)
182+
: RISCVIncomingValueHandler(B, MRI) {}
183+
184+
void markPhysRegUsed(MCRegister PhysReg) override {
185+
MIRBuilder.getMRI()->addLiveIn(PhysReg);
186+
MIRBuilder.getMBB().addLiveIn(PhysReg);
187+
}
188+
};
189+
176190
struct RISCVCallReturnHandler : public RISCVIncomingValueHandler {
177191
RISCVCallReturnHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
178192
MachineInstrBuilder &MIB)
179193
: RISCVIncomingValueHandler(B, MRI), MIB(MIB) {}
180194

181-
MachineInstrBuilder MIB;
182-
183-
void assignValueToReg(Register ValVReg, Register PhysReg,
184-
CCValAssign VA) override {
185-
// Copy argument received in physical register to desired VReg.
195+
void markPhysRegUsed(MCRegister PhysReg) override {
186196
MIB.addDef(PhysReg, RegState::Implicit);
187-
MIRBuilder.buildCopy(ValVReg, PhysReg);
188197
}
198+
199+
MachineInstrBuilder MIB;
189200
};
190201

191202
} // namespace
@@ -312,7 +323,7 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
312323
RISCVIncomingValueAssigner Assigner(
313324
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
314325
/*IsRet=*/false);
315-
RISCVIncomingValueHandler Handler(MIRBuilder, MF.getRegInfo());
326+
RISCVFormalArgHandler Handler(MIRBuilder, MF.getRegInfo());
316327

317328
return determineAndHandleAssignments(Handler, Assigner, SplitArgInfos,
318329
MIRBuilder, CC, F.isVarArg());

llvm/lib/Transforms/Utils/SimplifyIndVar.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -659,12 +659,12 @@ bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) {
659659
Instruction *IVOperand = cast<Instruction>(UseInst->getOperand(0));
660660
// Get the symbolic expression for this instruction.
661661
const SCEV *IV = SE->getSCEV(IVOperand);
662-
unsigned MaskBits;
662+
int MaskBits;
663663
if (UseInst->getOpcode() == CastInst::SIToFP)
664-
MaskBits = SE->getSignedRange(IV).getMinSignedBits();
664+
MaskBits = (int)SE->getSignedRange(IV).getMinSignedBits();
665665
else
666-
MaskBits = SE->getUnsignedRange(IV).getActiveBits();
667-
unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth();
666+
MaskBits = (int)SE->getUnsignedRange(IV).getActiveBits();
667+
int DestNumSigBits = UseInst->getType()->getFPMantissaWidth();
668668
if (MaskBits <= DestNumSigBits) {
669669
for (User *U : UseInst->users()) {
670670
// Match for fptosi/fptoui of sitofp and with same type.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7463,21 +7463,30 @@ VPValue *VPBuilder::createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
74637463
new VPInstruction(Instruction::ICmp, Pred, A, B, DL, Name));
74647464
}
74657465

7466+
// This function will select a scalable VF if the target supports scalable
7467+
// vectors and a fixed one otherwise.
74667468
// TODO: we could return a pair of values that specify the max VF and
74677469
// min VF, to be used in `buildVPlans(MinVF, MaxVF)` instead of
74687470
// `buildVPlans(VF, VF)`. We cannot do it because VPLAN at the moment
74697471
// doesn't have a cost model that can choose which plan to execute if
74707472
// more than one is generated.
7471-
static unsigned determineVPlanVF(const unsigned WidestVectorRegBits,
7472-
LoopVectorizationCostModel &CM) {
7473+
static ElementCount determineVPlanVF(const TargetTransformInfo &TTI,
7474+
LoopVectorizationCostModel &CM) {
74737475
unsigned WidestType;
74747476
std::tie(std::ignore, WidestType) = CM.getSmallestAndWidestTypes();
7475-
return WidestVectorRegBits / WidestType;
7477+
7478+
TargetTransformInfo::RegisterKind RegKind =
7479+
TTI.enableScalableVectorization()
7480+
? TargetTransformInfo::RGK_ScalableVector
7481+
: TargetTransformInfo::RGK_FixedWidthVector;
7482+
7483+
TypeSize RegSize = TTI.getRegisterBitWidth(RegKind);
7484+
unsigned N = RegSize.getKnownMinValue() / WidestType;
7485+
return ElementCount::get(N, RegSize.isScalable());
74767486
}
74777487

74787488
VectorizationFactor
74797489
LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
7480-
assert(!UserVF.isScalable() && "scalable vectors not yet supported");
74817490
ElementCount VF = UserVF;
74827491
// Outer loop handling: They may require CFG and instruction level
74837492
// transformations before even evaluating whether vectorization is profitable.
@@ -7487,10 +7496,7 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
74877496
// If the user doesn't provide a vectorization factor, determine a
74887497
// reasonable one.
74897498
if (UserVF.isZero()) {
7490-
VF = ElementCount::getFixed(determineVPlanVF(
7491-
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
7492-
.getFixedValue(),
7493-
CM));
7499+
VF = determineVPlanVF(TTI, CM);
74947500
LLVM_DEBUG(dbgs() << "LV: VPlan computed VF " << VF << ".\n");
74957501

74967502
// Make sure we have a VF > 1 for stress testing.
@@ -7499,6 +7505,17 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
74997505
<< "overriding computed VF.\n");
75007506
VF = ElementCount::getFixed(4);
75017507
}
7508+
} else if (UserVF.isScalable() && !TTI.supportsScalableVectors() &&
7509+
!ForceTargetSupportsScalableVectors) {
7510+
LLVM_DEBUG(dbgs() << "LV: Not vectorizing. Scalable VF requested, but "
7511+
<< "not supported by the target.\n");
7512+
reportVectorizationFailure(
7513+
"Scalable vectorization requested but not supported by the target",
7514+
"the scalable user-specified vectorization width for outer-loop "
7515+
"vectorization cannot be used because the target does not support "
7516+
"scalable vectors.",
7517+
"ScalableVFUnfeasible", ORE, OrigLoop);
7518+
return VectorizationFactor::Disabled();
75027519
}
75037520
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
75047521
assert(isPowerOf2_32(VF.getKnownMinValue()) &&
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2+
; RUN: opt -S -mtriple aarch64 -mattr=+sve -passes=loop-vectorize -enable-vplan-native-path < %s | FileCheck %s
3+
4+
@A = external local_unnamed_addr global [1024 x float], align 4
5+
@B = external local_unnamed_addr global [512 x float], align 4
6+
7+
; Test if the vplan-native-path successfully vectorizes a loop using scalable vectors if the target preferes scalable vectors.
8+
define void @foo() {
9+
; CHECK-LABEL: define void @foo
10+
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
11+
; CHECK-NEXT: entry:
12+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
13+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
14+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
15+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
16+
; CHECK: vector.ph:
17+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
18+
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
19+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
20+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
21+
; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
22+
; CHECK-NEXT: [[TMP5:%.*]] = add <vscale x 4 x i64> [[TMP4]], zeroinitializer
23+
; CHECK-NEXT: [[TMP6:%.*]] = mul <vscale x 4 x i64> [[TMP5]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
24+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP6]]
25+
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
26+
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
27+
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]]
28+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
29+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
30+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
31+
; CHECK: vector.body:
32+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[OUTER_LOOP_LATCH4:%.*]] ]
33+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[OUTER_LOOP_LATCH4]] ]
34+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x float], ptr @A, i64 0, <vscale x 4 x i64> [[VEC_IND]]
35+
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
36+
; CHECK-NEXT: br label [[INNER_LOOP1:%.*]]
37+
; CHECK: inner_loop1:
38+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP13:%.*]], [[INNER_LOOP1]] ]
39+
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 4 x float> [ [[WIDE_MASKED_GATHER]], [[VECTOR_BODY]] ], [ [[TMP12:%.*]], [[INNER_LOOP1]] ]
40+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [512 x float], ptr @B, i64 0, <vscale x 4 x i64> [[VEC_PHI]]
41+
; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
42+
; CHECK-NEXT: [[TMP12]] = fmul <vscale x 4 x float> [[VEC_PHI2]], [[WIDE_MASKED_GATHER3]]
43+
; CHECK-NEXT: [[TMP13]] = add nuw nsw <vscale x 4 x i64> [[VEC_PHI]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
44+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <vscale x 4 x i64> [[TMP13]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 512, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
45+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[TMP14]], i32 0
46+
; CHECK-NEXT: br i1 [[TMP15]], label [[OUTER_LOOP_LATCH4]], label [[INNER_LOOP1]]
47+
; CHECK: outer_loop_latch4:
48+
; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <vscale x 4 x float> [ [[TMP12]], [[INNER_LOOP1]] ]
49+
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VEC_PHI5]], <vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
50+
; CHECK-NEXT: [[TMP16:%.*]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
51+
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq <vscale x 4 x i64> [[TMP16]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1024, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
52+
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
53+
; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4
54+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]]
55+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
56+
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
57+
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
58+
; CHECK: middle.block:
59+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
60+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
61+
; CHECK: scalar.ph:
62+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
63+
; CHECK-NEXT: br label [[OUTER_LOOP:%.*]]
64+
; CHECK: outer_loop:
65+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[OUTER_LOOP_LATCH:%.*]] ]
66+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x float], ptr @A, i64 0, i64 [[I]]
67+
; CHECK-NEXT: [[X_START:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
68+
; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
69+
; CHECK: inner_loop:
70+
; CHECK-NEXT: [[J:%.*]] = phi i64 [ 0, [[OUTER_LOOP]] ], [ [[J_NEXT:%.*]], [[INNER_LOOP]] ]
71+
; CHECK-NEXT: [[X:%.*]] = phi float [ [[X_START]], [[OUTER_LOOP]] ], [ [[X_NEXT:%.*]], [[INNER_LOOP]] ]
72+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [512 x float], ptr @B, i64 0, i64 [[J]]
73+
; CHECK-NEXT: [[B:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
74+
; CHECK-NEXT: [[X_NEXT]] = fmul float [[X]], [[B]]
75+
; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 1
76+
; CHECK-NEXT: [[INNER_EXITCOND:%.*]] = icmp eq i64 [[J_NEXT]], 512
77+
; CHECK-NEXT: br i1 [[INNER_EXITCOND]], label [[OUTER_LOOP_LATCH]], label [[INNER_LOOP]]
78+
; CHECK: outer_loop_latch:
79+
; CHECK-NEXT: [[X_NEXT_LCSSA:%.*]] = phi float [ [[X_NEXT]], [[INNER_LOOP]] ]
80+
; CHECK-NEXT: store float [[X_NEXT_LCSSA]], ptr [[ARRAYIDX1]], align 4
81+
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
82+
; CHECK-NEXT: [[OUTER_EXITCOND:%.*]] = icmp eq i64 [[I_NEXT]], 1024
83+
; CHECK-NEXT: br i1 [[OUTER_EXITCOND]], label [[EXIT]], label [[OUTER_LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
84+
; CHECK: exit:
85+
; CHECK-NEXT: ret void
86+
;
87+
entry:
88+
br label %outer_loop
89+
90+
outer_loop:
91+
%i = phi i64 [ 0, %entry ], [ %i.next, %outer_loop_latch ]
92+
%arrayidx1 = getelementptr inbounds [1024 x float], ptr @A, i64 0, i64 %i
93+
%x.start = load float, ptr %arrayidx1, align 4
94+
br label %inner_loop
95+
96+
inner_loop:
97+
%j = phi i64 [ 0, %outer_loop ], [ %j.next, %inner_loop ]
98+
%x = phi float [ %x.start, %outer_loop ], [ %x.next, %inner_loop ]
99+
%arrayidx2 = getelementptr inbounds [512 x float], ptr @B, i64 0, i64 %j
100+
%b = load float, ptr %arrayidx2, align 4
101+
%x.next = fmul float %x, %b
102+
%j.next = add nuw nsw i64 %j, 1
103+
%inner_exitcond = icmp eq i64 %j.next, 512
104+
br i1 %inner_exitcond, label %outer_loop_latch, label %inner_loop
105+
106+
outer_loop_latch:
107+
store float %x.next, ptr %arrayidx1, align 4
108+
%i.next = add nuw nsw i64 %i, 1
109+
%outer_exitcond = icmp eq i64 %i.next, 1024
110+
br i1 %outer_exitcond, label %exit, label %outer_loop, !llvm.loop !1
111+
112+
exit:
113+
ret void
114+
}
115+
116+
!1 = distinct !{!1, !2}
117+
!2 = !{!"llvm.loop.vectorize.enable", i1 true}

0 commit comments

Comments
 (0)