Skip to content

Commit 99c5a66

Browse files
committed
Revert "[SeparateConstOffsetFromGEP] Reorder trivial GEP chains to separate constants (#73056)" and follow ups
"ninja check-llvm" is failing on tip of tree. This reverts commit ec0aa16. This reverts commit 1b65742.
1 parent fa77e1f commit 99c5a66

File tree

8 files changed

+157
-679
lines changed

8 files changed

+157
-679
lines changed

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

Lines changed: 3 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -391,11 +391,6 @@ class SeparateConstOffsetFromGEP {
391391
/// and returns true if the splitting succeeds.
392392
bool splitGEP(GetElementPtrInst *GEP);
393393

394-
/// Tries to reorder the given GEP with the GEP that produces the base if
395-
/// doing so results in producing a constant offset as the outermost
396-
/// index.
397-
bool reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI);
398-
399394
/// Lower a GEP with multiple indices into multiple GEPs with a single index.
400395
/// Function splitGEP already split the original GEP into a variadic part and
401396
/// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
@@ -969,66 +964,6 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
969964
Variadic->eraseFromParent();
970965
}
971966

972-
bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
973-
TargetTransformInfo &TTI) {
974-
Type *GEPType = GEP->getResultElementType();
975-
// TODO: support reordering for non-trivial GEP chains
976-
if (GEPType->isAggregateType() || GEP->getNumIndices() != 1)
977-
return false;
978-
979-
auto PtrGEP = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
980-
if (!PtrGEP)
981-
return false;
982-
Type *PtrGEPType = PtrGEP->getResultElementType();
983-
// TODO: support reordering for non-trivial GEP chains
984-
if (PtrGEPType->isAggregateType() || PtrGEP->getNumIndices() != 1)
985-
return false;
986-
987-
// TODO: support reordering for non-trivial GEP chains
988-
if (PtrGEPType != GEPType ||
989-
PtrGEP->getSourceElementType() != GEP->getSourceElementType())
990-
return false;
991-
992-
bool NestedNeedsExtraction;
993-
int64_t NestedByteOffset =
994-
accumulateByteOffset(PtrGEP, NestedNeedsExtraction);
995-
if (!NestedNeedsExtraction)
996-
return false;
997-
998-
unsigned AddrSpace = PtrGEP->getPointerAddressSpace();
999-
if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
1000-
/*BaseGV=*/nullptr, NestedByteOffset,
1001-
/*HasBaseReg=*/true, /*Scale=*/0, AddrSpace))
1002-
return false;
1003-
1004-
IRBuilder<> Builder(GEP);
1005-
Builder.SetCurrentDebugLocation(GEP->getDebugLoc());
1006-
bool GEPInBounds = GEP->isInBounds();
1007-
bool PtrGEPInBounds = PtrGEP->isInBounds();
1008-
bool IsChainInBounds = GEPInBounds && PtrGEPInBounds;
1009-
if (IsChainInBounds) {
1010-
auto GEPIdx = GEP->indices().begin();
1011-
auto KnownGEPIdx = computeKnownBits(GEPIdx->get(), *DL);
1012-
IsChainInBounds &= KnownGEPIdx.isNonNegative();
1013-
if (IsChainInBounds) {
1014-
auto PtrGEPIdx = GEP->indices().begin();
1015-
auto KnownPtrGEPIdx = computeKnownBits(PtrGEPIdx->get(), *DL);
1016-
IsChainInBounds &= KnownPtrGEPIdx.isNonNegative();
1017-
}
1018-
}
1019-
1020-
// For trivial GEP chains, we can swap the indicies.
1021-
auto NewSrc = Builder.CreateGEP(PtrGEPType, PtrGEP->getPointerOperand(),
1022-
SmallVector<Value *, 4>(GEP->indices()));
1023-
cast<GetElementPtrInst>(NewSrc)->setIsInBounds(IsChainInBounds);
1024-
auto NewGEP = Builder.CreateGEP(GEPType, NewSrc,
1025-
SmallVector<Value *, 4>(PtrGEP->indices()));
1026-
cast<GetElementPtrInst>(NewGEP)->setIsInBounds(IsChainInBounds);
1027-
GEP->replaceAllUsesWith(NewGEP);
1028-
RecursivelyDeleteTriviallyDeadInstructions(GEP);
1029-
return true;
1030-
}
1031-
1032967
bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
1033968
// Skip vector GEPs.
1034969
if (GEP->getType()->isVectorTy())
@@ -1044,12 +979,10 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
1044979
bool NeedsExtraction;
1045980
int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
1046981

1047-
TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
1048-
1049-
if (!NeedsExtraction) {
1050-
Changed |= reorderGEP(GEP, TTI);
982+
if (!NeedsExtraction)
1051983
return Changed;
1052-
}
984+
985+
TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
1053986

1054987
// If LowerGEP is disabled, before really splitting the GEP, check whether the
1055988
// backend supports the addressing mode we are about to produce. If no, this

llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -273,11 +273,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
273273
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
274274
; CHECK-NEXT: ds_write_b32 v0, v58
275275
; CHECK-NEXT: s_branch .LBB0_7
276-
; CHECK-NEXT: .LBB0_16: ; %Flow45
276+
; CHECK-NEXT: .LBB0_16: ; %Flow43
277277
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
278278
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s53
279279
; CHECK-NEXT: v_mov_b32_e32 v57, v0
280-
; CHECK-NEXT: .LBB0_17: ; %Flow46
280+
; CHECK-NEXT: .LBB0_17: ; %Flow44
281281
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
282282
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52
283283
; CHECK-NEXT: s_mov_b32 s49, exec_lo
@@ -323,11 +323,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
323323
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
324324
; CHECK-NEXT: ds_write_b32 v0, v57
325325
; CHECK-NEXT: s_branch .LBB0_19
326-
; CHECK-NEXT: .LBB0_22: ; %Flow43
326+
; CHECK-NEXT: .LBB0_22: ; %Flow41
327327
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
328328
; CHECK-NEXT: s_inst_prefetch 0x2
329329
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52
330-
; CHECK-NEXT: .LBB0_23: ; %Flow44
330+
; CHECK-NEXT: .LBB0_23: ; %Flow42
331331
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
332332
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s49
333333
; CHECK-NEXT: ; %bb.24: ; in Loop: Header=BB0_5 Depth=1
@@ -340,7 +340,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
340340
; CHECK-NEXT: s_or_b32 s43, s4, s43
341341
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s43
342342
; CHECK-NEXT: s_cbranch_execnz .LBB0_5
343-
; CHECK-NEXT: .LBB0_25: ; %Flow51
343+
; CHECK-NEXT: .LBB0_25: ; %Flow49
344344
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s42
345345
; CHECK-NEXT: v_mov_b32_e32 v31, v40
346346
; CHECK-NEXT: v_mov_b32_e32 v0, 1
@@ -362,10 +362,12 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
362362
; CHECK-NEXT: v_cmpx_gt_u32_e64 v47, v41
363363
; CHECK-NEXT: s_cbranch_execz .LBB0_33
364364
; CHECK-NEXT: ; %bb.26:
365-
; CHECK-NEXT: s_mov_b32 s42, 0
365+
; CHECK-NEXT: s_add_u32 s42, s44, 8
366+
; CHECK-NEXT: s_addc_u32 s43, s45, 0
367+
; CHECK-NEXT: s_mov_b32 s44, 0
366368
; CHECK-NEXT: s_branch .LBB0_28
367369
; CHECK-NEXT: .LBB0_27: ; in Loop: Header=BB0_28 Depth=1
368-
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s43
370+
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s45
369371
; CHECK-NEXT: v_mov_b32_e32 v31, v40
370372
; CHECK-NEXT: v_mov_b32_e32 v0, 0
371373
; CHECK-NEXT: s_add_u32 s8, s34, 40
@@ -381,12 +383,12 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
381383
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
382384
; CHECK-NEXT: v_add_co_u32 v41, vcc_lo, v0, v41
383385
; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v47, v41
384-
; CHECK-NEXT: s_or_b32 s42, vcc_lo, s42
385-
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s42
386+
; CHECK-NEXT: s_or_b32 s44, vcc_lo, s44
387+
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
386388
; CHECK-NEXT: s_cbranch_execz .LBB0_33
387389
; CHECK-NEXT: .LBB0_28: ; =>This Inner Loop Header: Depth=1
388390
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v41
389-
; CHECK-NEXT: s_mov_b32 s43, exec_lo
391+
; CHECK-NEXT: s_mov_b32 s45, exec_lo
390392
; CHECK-NEXT: ds_read_b32 v0, v0
391393
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
392394
; CHECK-NEXT: v_lshrrev_b32_e32 v63, 10, v0
@@ -395,15 +397,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
395397
; CHECK-NEXT: v_mul_u32_u24_e32 v1, 0x180, v63
396398
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 5, v62
397399
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 5, v72
398-
; CHECK-NEXT: v_add_co_u32 v2, s4, s44, v1
399-
; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s45, 0, s4
400+
; CHECK-NEXT: v_add_co_u32 v2, s4, s42, v1
401+
; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s43, 0, s4
400402
; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
401403
; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
402404
; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
403405
; CHECK-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
404406
; CHECK-NEXT: s_clause 0x1
405-
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:8
406-
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:8
407+
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
408+
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
407409
; CHECK-NEXT: s_waitcnt vmcnt(0)
408410
; CHECK-NEXT: v_xor_b32_e32 v46, v9, v5
409411
; CHECK-NEXT: v_xor_b32_e32 v45, v8, v4
@@ -415,8 +417,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
415417
; CHECK-NEXT: s_cbranch_execz .LBB0_27
416418
; CHECK-NEXT: ; %bb.29: ; in Loop: Header=BB0_28 Depth=1
417419
; CHECK-NEXT: s_clause 0x1
418-
; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:24
419-
; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:24
420+
; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:16
421+
; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:16
420422
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 4, v45
421423
; CHECK-NEXT: v_alignbit_b32 v1, v46, v45, 12
422424
; CHECK-NEXT: v_and_b32_e32 v2, 0xf0000, v45

0 commit comments

Comments
 (0)