Skip to content

Commit cad444e

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:57a0a9aadf1e into amd-gfx:416e90e927a5
Local branch amd-gfx 416e90e Merged main:681d02d09e35 into amd-gfx:5709b9b421d5 Remote branch main 57a0a9a [InstCombine] Add more inbounds tests for indexed compare fold (NFC)
2 parents 416e90e + 57a0a9a commit cad444e

File tree

15 files changed

+912
-476
lines changed

15 files changed

+912
-476
lines changed

clang/lib/Format/TokenAnnotator.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2391,9 +2391,8 @@ class AnnotatingParser {
23912391
return true;
23922392

23932393
// If a (non-string) literal follows, this is likely a cast.
2394-
if (Tok.Next->isNot(tok::string_literal) &&
2395-
(Tok.Next->Tok.isLiteral() ||
2396-
Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) {
2394+
if (Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof) ||
2395+
(Tok.Next->Tok.isLiteral() && Tok.Next->isNot(tok::string_literal))) {
23972396
return true;
23982397
}
23992398

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 481770
19+
#define LLVM_MAIN_REVISION 481778
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/include/llvm/Support/GenericDomTreeConstruction.h

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ struct SemiNCAInfo {
6565
unsigned DFSNum = 0;
6666
unsigned Parent = 0;
6767
unsigned Semi = 0;
68-
NodePtr Label = nullptr;
68+
unsigned Label = 0;
6969
NodePtr IDom = nullptr;
7070
SmallVector<NodePtr, 2> ReverseChildren;
7171
};
@@ -189,8 +189,7 @@ struct SemiNCAInfo {
189189

190190
// Visited nodes always have positive DFS numbers.
191191
if (BBInfo.DFSNum != 0) continue;
192-
BBInfo.DFSNum = BBInfo.Semi = ++LastNum;
193-
BBInfo.Label = BB;
192+
BBInfo.DFSNum = BBInfo.Semi = BBInfo.Label = ++LastNum;
194193
NumToNode.push_back(BB);
195194

196195
constexpr bool Direction = IsReverse != IsPostDom; // XOR.
@@ -237,8 +236,9 @@ struct SemiNCAInfo {
237236
//
238237
// For each vertex V, its Label points to the vertex with the minimal sdom(U)
239238
// (Semi) in its path from V (included) to NodeToInfo[V].Parent (excluded).
240-
NodePtr eval(NodePtr V, unsigned LastLinked,
241-
SmallVectorImpl<InfoRec *> &Stack) {
239+
unsigned eval(NodePtr V, unsigned LastLinked,
240+
SmallVectorImpl<InfoRec *> &Stack,
241+
ArrayRef<InfoRec *> NumToInfo) {
242242
InfoRec *VInfo = &NodeToInfo[V];
243243
if (VInfo->Parent < LastLinked)
244244
return VInfo->Label;
@@ -247,17 +247,17 @@ struct SemiNCAInfo {
247247
assert(Stack.empty());
248248
do {
249249
Stack.push_back(VInfo);
250-
VInfo = &NodeToInfo[NumToNode[VInfo->Parent]];
250+
VInfo = NumToInfo[VInfo->Parent];
251251
} while (VInfo->Parent >= LastLinked);
252252

253253
// Path compression. Point each vertex's Parent to the root and update its
254254
// Label if any of its ancestors (PInfo->Label) has a smaller Semi.
255255
const InfoRec *PInfo = VInfo;
256-
const InfoRec *PLabelInfo = &NodeToInfo[PInfo->Label];
256+
const InfoRec *PLabelInfo = NumToInfo[PInfo->Label];
257257
do {
258258
VInfo = Stack.pop_back_val();
259259
VInfo->Parent = PInfo->Parent;
260-
const InfoRec *VLabelInfo = &NodeToInfo[VInfo->Label];
260+
const InfoRec *VLabelInfo = NumToInfo[VInfo->Label];
261261
if (PLabelInfo->Semi < VLabelInfo->Semi)
262262
VInfo->Label = PInfo->Label;
263263
else
@@ -270,18 +270,20 @@ struct SemiNCAInfo {
270270
// This function requires DFS to be run before calling it.
271271
void runSemiNCA(DomTreeT &DT, const unsigned MinLevel = 0) {
272272
const unsigned NextDFSNum(NumToNode.size());
273+
SmallVector<InfoRec *, 8> NumToInfo = {nullptr};
274+
NumToInfo.reserve(NextDFSNum);
273275
// Initialize IDoms to spanning tree parents.
274276
for (unsigned i = 1; i < NextDFSNum; ++i) {
275277
const NodePtr V = NumToNode[i];
276278
auto &VInfo = NodeToInfo[V];
277279
VInfo.IDom = NumToNode[VInfo.Parent];
280+
NumToInfo.push_back(&VInfo);
278281
}
279282

280283
// Step #1: Calculate the semidominators of all vertices.
281284
SmallVector<InfoRec *, 32> EvalStack;
282285
for (unsigned i = NextDFSNum - 1; i >= 2; --i) {
283-
NodePtr W = NumToNode[i];
284-
auto &WInfo = NodeToInfo[W];
286+
auto &WInfo = *NumToInfo[i];
285287

286288
// Initialize the semi dominator to point to the parent node.
287289
WInfo.Semi = WInfo.Parent;
@@ -294,7 +296,7 @@ struct SemiNCAInfo {
294296
if (TN && TN->getLevel() < MinLevel)
295297
continue;
296298

297-
unsigned SemiU = NodeToInfo[eval(N, i + 1, EvalStack)].Semi;
299+
unsigned SemiU = NumToInfo[eval(N, i + 1, EvalStack, NumToInfo)]->Semi;
298300
if (SemiU < WInfo.Semi) WInfo.Semi = SemiU;
299301
}
300302
}
@@ -304,8 +306,7 @@ struct SemiNCAInfo {
304306
// Note that the parents were stored in IDoms and later got invalidated
305307
// during path compression in Eval.
306308
for (unsigned i = 2; i < NextDFSNum; ++i) {
307-
const NodePtr W = NumToNode[i];
308-
auto &WInfo = NodeToInfo[W];
309+
auto &WInfo = *NumToInfo[i];
309310
const unsigned SDomNum = NodeToInfo[NumToNode[WInfo.Semi]].DFSNum;
310311
NodePtr WIDomCandidate = WInfo.IDom;
311312
while (NodeToInfo[WIDomCandidate].DFSNum > SDomNum)
@@ -325,8 +326,7 @@ struct SemiNCAInfo {
325326
assert(NumToNode.size() == 1 && "SNCAInfo must be freshly constructed");
326327

327328
auto &BBInfo = NodeToInfo[nullptr];
328-
BBInfo.DFSNum = BBInfo.Semi = 1;
329-
BBInfo.Label = nullptr;
329+
BBInfo.DFSNum = BBInfo.Semi = BBInfo.Label = 1;
330330

331331
NumToNode.push_back(nullptr); // NumToNode[1] = nullptr;
332332
}

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2820,6 +2820,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
28202820
Observer.changedInstr(MI);
28212821
return Legalized;
28222822
}
2823+
case TargetOpcode::G_VECREDUCE_FADD:
28232824
case TargetOpcode::G_VECREDUCE_FMIN:
28242825
case TargetOpcode::G_VECREDUCE_FMAX:
28252826
case TargetOpcode::G_VECREDUCE_FMINIMUM:

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -970,11 +970,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
970970
.legalFor(PackedVectorAllTypeList)
971971
.lowerIf(isScalar(0));
972972

973+
// For fadd reductions we have pairwise operations available. We treat the
974+
// usual legal types as legal and handle the lowering to pairwise instructions
975+
// later.
973976
getActionDefinitionsBuilder(G_VECREDUCE_FADD)
974-
// We only have FADDP to do reduction-like operations. Lower the rest.
975-
.legalFor({{s32, v2s32}, {s64, v2s64}})
977+
.legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
978+
.legalIf([=](const LegalityQuery &Query) {
979+
const auto &Ty = Query.Types[1];
980+
return (Ty == v4s16 || Ty == v8s16) && HasFP16;
981+
})
982+
.minScalarOrElt(0, MinFPScalar)
976983
.clampMaxNumElements(1, s64, 2)
977-
.clampMaxNumElements(1, s32, 2)
984+
.clampMaxNumElements(1, s32, 4)
985+
.clampMaxNumElements(1, s16, 8)
978986
.lower();
979987

980988
getActionDefinitionsBuilder(G_VECREDUCE_ADD)

llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp

Lines changed: 100 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,10 @@ MCRegister llvm::getX86SubSuperRegister(MCRegister Reg, unsigned Size,
786786
#define SP_SUB_SUPER(R) SUB_SUPER(SPL, SP, ESP, RSP, R)
787787
#define NO_SUB_SUPER(NO, REG) \
788788
SUB_SUPER(R##NO##B, R##NO##W, R##NO##D, R##NO, REG)
789+
#define NO_SUB_SUPER_B(NO) NO_SUB_SUPER(NO, R##NO##B)
790+
#define NO_SUB_SUPER_W(NO) NO_SUB_SUPER(NO, R##NO##W)
791+
#define NO_SUB_SUPER_D(NO) NO_SUB_SUPER(NO, R##NO##D)
792+
#define NO_SUB_SUPER_Q(NO) NO_SUB_SUPER(NO, R##NO)
789793
switch (Size) {
790794
default:
791795
llvm_unreachable("illegal register size");
@@ -809,14 +813,30 @@ MCRegister llvm::getX86SubSuperRegister(MCRegister Reg, unsigned Size,
809813
DI_SUB_SUPER(DIL)
810814
BP_SUB_SUPER(BPL)
811815
SP_SUB_SUPER(SPL)
812-
NO_SUB_SUPER(8, R8B)
813-
NO_SUB_SUPER(9, R9B)
814-
NO_SUB_SUPER(10, R10B)
815-
NO_SUB_SUPER(11, R11B)
816-
NO_SUB_SUPER(12, R12B)
817-
NO_SUB_SUPER(13, R13B)
818-
NO_SUB_SUPER(14, R14B)
819-
NO_SUB_SUPER(15, R15B)
816+
NO_SUB_SUPER_B(8)
817+
NO_SUB_SUPER_B(9)
818+
NO_SUB_SUPER_B(10)
819+
NO_SUB_SUPER_B(11)
820+
NO_SUB_SUPER_B(12)
821+
NO_SUB_SUPER_B(13)
822+
NO_SUB_SUPER_B(14)
823+
NO_SUB_SUPER_B(15)
824+
NO_SUB_SUPER_B(16)
825+
NO_SUB_SUPER_B(17)
826+
NO_SUB_SUPER_B(18)
827+
NO_SUB_SUPER_B(19)
828+
NO_SUB_SUPER_B(20)
829+
NO_SUB_SUPER_B(21)
830+
NO_SUB_SUPER_B(22)
831+
NO_SUB_SUPER_B(23)
832+
NO_SUB_SUPER_B(24)
833+
NO_SUB_SUPER_B(25)
834+
NO_SUB_SUPER_B(26)
835+
NO_SUB_SUPER_B(27)
836+
NO_SUB_SUPER_B(28)
837+
NO_SUB_SUPER_B(29)
838+
NO_SUB_SUPER_B(30)
839+
NO_SUB_SUPER_B(31)
820840
}
821841
}
822842
case 16:
@@ -830,14 +850,30 @@ MCRegister llvm::getX86SubSuperRegister(MCRegister Reg, unsigned Size,
830850
DI_SUB_SUPER(DI)
831851
BP_SUB_SUPER(BP)
832852
SP_SUB_SUPER(SP)
833-
NO_SUB_SUPER(8, R8W)
834-
NO_SUB_SUPER(9, R9W)
835-
NO_SUB_SUPER(10, R10W)
836-
NO_SUB_SUPER(11, R11W)
837-
NO_SUB_SUPER(12, R12W)
838-
NO_SUB_SUPER(13, R13W)
839-
NO_SUB_SUPER(14, R14W)
840-
NO_SUB_SUPER(15, R15W)
853+
NO_SUB_SUPER_W(8)
854+
NO_SUB_SUPER_W(9)
855+
NO_SUB_SUPER_W(10)
856+
NO_SUB_SUPER_W(11)
857+
NO_SUB_SUPER_W(12)
858+
NO_SUB_SUPER_W(13)
859+
NO_SUB_SUPER_W(14)
860+
NO_SUB_SUPER_W(15)
861+
NO_SUB_SUPER_W(16)
862+
NO_SUB_SUPER_W(17)
863+
NO_SUB_SUPER_W(18)
864+
NO_SUB_SUPER_W(19)
865+
NO_SUB_SUPER_W(20)
866+
NO_SUB_SUPER_W(21)
867+
NO_SUB_SUPER_W(22)
868+
NO_SUB_SUPER_W(23)
869+
NO_SUB_SUPER_W(24)
870+
NO_SUB_SUPER_W(25)
871+
NO_SUB_SUPER_W(26)
872+
NO_SUB_SUPER_W(27)
873+
NO_SUB_SUPER_W(28)
874+
NO_SUB_SUPER_W(29)
875+
NO_SUB_SUPER_W(30)
876+
NO_SUB_SUPER_W(31)
841877
}
842878
case 32:
843879
switch (Reg.id()) {
@@ -850,14 +886,30 @@ MCRegister llvm::getX86SubSuperRegister(MCRegister Reg, unsigned Size,
850886
DI_SUB_SUPER(EDI)
851887
BP_SUB_SUPER(EBP)
852888
SP_SUB_SUPER(ESP)
853-
NO_SUB_SUPER(8, R8D)
854-
NO_SUB_SUPER(9, R9D)
855-
NO_SUB_SUPER(10, R10D)
856-
NO_SUB_SUPER(11, R11D)
857-
NO_SUB_SUPER(12, R12D)
858-
NO_SUB_SUPER(13, R13D)
859-
NO_SUB_SUPER(14, R14D)
860-
NO_SUB_SUPER(15, R15D)
889+
NO_SUB_SUPER_D(8)
890+
NO_SUB_SUPER_D(9)
891+
NO_SUB_SUPER_D(10)
892+
NO_SUB_SUPER_D(11)
893+
NO_SUB_SUPER_D(12)
894+
NO_SUB_SUPER_D(13)
895+
NO_SUB_SUPER_D(14)
896+
NO_SUB_SUPER_D(15)
897+
NO_SUB_SUPER_D(16)
898+
NO_SUB_SUPER_D(17)
899+
NO_SUB_SUPER_D(18)
900+
NO_SUB_SUPER_D(19)
901+
NO_SUB_SUPER_D(20)
902+
NO_SUB_SUPER_D(21)
903+
NO_SUB_SUPER_D(22)
904+
NO_SUB_SUPER_D(23)
905+
NO_SUB_SUPER_D(24)
906+
NO_SUB_SUPER_D(25)
907+
NO_SUB_SUPER_D(26)
908+
NO_SUB_SUPER_D(27)
909+
NO_SUB_SUPER_D(28)
910+
NO_SUB_SUPER_D(29)
911+
NO_SUB_SUPER_D(30)
912+
NO_SUB_SUPER_D(31)
861913
}
862914
case 64:
863915
switch (Reg.id()) {
@@ -870,14 +922,30 @@ MCRegister llvm::getX86SubSuperRegister(MCRegister Reg, unsigned Size,
870922
DI_SUB_SUPER(RDI)
871923
BP_SUB_SUPER(RBP)
872924
SP_SUB_SUPER(RSP)
873-
NO_SUB_SUPER(8, R8)
874-
NO_SUB_SUPER(9, R9)
875-
NO_SUB_SUPER(10, R10)
876-
NO_SUB_SUPER(11, R11)
877-
NO_SUB_SUPER(12, R12)
878-
NO_SUB_SUPER(13, R13)
879-
NO_SUB_SUPER(14, R14)
880-
NO_SUB_SUPER(15, R15)
925+
NO_SUB_SUPER_Q(8)
926+
NO_SUB_SUPER_Q(9)
927+
NO_SUB_SUPER_Q(10)
928+
NO_SUB_SUPER_Q(11)
929+
NO_SUB_SUPER_Q(12)
930+
NO_SUB_SUPER_Q(13)
931+
NO_SUB_SUPER_Q(14)
932+
NO_SUB_SUPER_Q(15)
933+
NO_SUB_SUPER_Q(16)
934+
NO_SUB_SUPER_Q(17)
935+
NO_SUB_SUPER_Q(18)
936+
NO_SUB_SUPER_Q(19)
937+
NO_SUB_SUPER_Q(20)
938+
NO_SUB_SUPER_Q(21)
939+
NO_SUB_SUPER_Q(22)
940+
NO_SUB_SUPER_Q(23)
941+
NO_SUB_SUPER_Q(24)
942+
NO_SUB_SUPER_Q(25)
943+
NO_SUB_SUPER_Q(26)
944+
NO_SUB_SUPER_Q(27)
945+
NO_SUB_SUPER_Q(28)
946+
NO_SUB_SUPER_Q(29)
947+
NO_SUB_SUPER_Q(30)
948+
NO_SUB_SUPER_Q(31)
881949
}
882950
}
883951
}

llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ static cl::opt<bool> CanonicalizeICmpPredicatesToUnsigned(
5555
STATISTIC(NumPhis, "Number of phis propagated");
5656
STATISTIC(NumPhiCommon, "Number of phis deleted via common incoming value");
5757
STATISTIC(NumSelects, "Number of selects propagated");
58-
STATISTIC(NumMemAccess, "Number of memory access targets propagated");
5958
STATISTIC(NumCmps, "Number of comparisons propagated");
6059
STATISTIC(NumReturns, "Number of return values propagated");
6160
STATISTIC(NumDeadCases, "Number of switch cases removed");
@@ -254,23 +253,6 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI, DominatorTree *DT,
254253
return Changed;
255254
}
256255

257-
static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) {
258-
Value *Pointer = nullptr;
259-
if (LoadInst *L = dyn_cast<LoadInst>(I))
260-
Pointer = L->getPointerOperand();
261-
else
262-
Pointer = cast<StoreInst>(I)->getPointerOperand();
263-
264-
if (isa<Constant>(Pointer)) return false;
265-
266-
Constant *C = LVI->getConstant(Pointer, I);
267-
if (!C) return false;
268-
269-
++NumMemAccess;
270-
I->replaceUsesOfWith(Pointer, C);
271-
return true;
272-
}
273-
274256
static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) {
275257
if (!CanonicalizeICmpPredicatesToUnsigned)
276258
return false;
@@ -1149,10 +1131,6 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
11491131
case Instruction::FCmp:
11501132
BBChanged |= processCmp(cast<CmpInst>(&II), LVI);
11511133
break;
1152-
case Instruction::Load:
1153-
case Instruction::Store:
1154-
BBChanged |= processMemAccess(&II, LVI);
1155-
break;
11561134
case Instruction::Call:
11571135
case Instruction::Invoke:
11581136
BBChanged |= processCallSite(cast<CallBase>(II), LVI);

llvm/lib/Transforms/Scalar/JumpThreading.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,6 @@ static cl::opt<unsigned> PhiDuplicateThreshold(
102102
cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76),
103103
cl::Hidden);
104104

105-
static cl::opt<bool> PrintLVIAfterJumpThreading(
106-
"print-lvi-after-jump-threading",
107-
cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
108-
cl::Hidden);
109-
110105
static cl::opt<bool> ThreadAcrossLoopHeaders(
111106
"jump-threading-across-loop-headers",
112107
cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
@@ -257,11 +252,6 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
257252
&DT, nullptr, DomTreeUpdater::UpdateStrategy::Lazy),
258253
std::nullopt, std::nullopt);
259254

260-
if (PrintLVIAfterJumpThreading) {
261-
dbgs() << "LVI for function '" << F.getName() << "':\n";
262-
LVI.printLVI(F, getDomTreeUpdater()->getDomTree(), dbgs());
263-
}
264-
265255
if (!Changed)
266256
return PreservedAnalyses::all();
267257

llvm/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
; RUN: opt < %s -passes=jump-threading -print-lvi-after-jump-threading -disable-output 2>&1 | FileCheck %s
2-
; RUN: opt < %s -passes=jump-threading -print-lvi-after-jump-threading -disable-output 2>&1 | FileCheck %s
1+
; RUN: opt < %s -passes="jump-threading,print<lazy-value-info>" -disable-output 2>&1 | FileCheck %s
32

43
; Testing LVI cache after jump-threading
54

0 commit comments

Comments
 (0)