Skip to content

Commit 16592a3

Browse files
committed
Revert "[InstCombine] Remove PromoteCastOfAllocation() fold (NFC)"
1 parent 82182a1 commit 16592a3

File tree

3 files changed

+250
-1
lines changed

3 files changed

+250
-1
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

Lines changed: 172 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,168 @@ using namespace PatternMatch;
2525

2626
#define DEBUG_TYPE "instcombine"
2727

28+
#ifndef INTEL_SYCL_OPAQUEPOINTER_READY
29+
/// Analyze 'Val', seeing if it is a simple linear expression.
30+
/// If so, decompose it, returning some value X, such that Val is
31+
/// X*Scale+Offset.
32+
///
33+
static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
34+
uint64_t &Offset) {
35+
if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
36+
Offset = CI->getZExtValue();
37+
Scale = 0;
38+
return ConstantInt::get(Val->getType(), 0);
39+
}
40+
41+
if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
42+
// Cannot look past anything that might overflow.
43+
// We specifically require nuw because we store the Scale in an unsigned
44+
// and perform an unsigned divide on it.
45+
OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
46+
if (OBI && !OBI->hasNoUnsignedWrap()) {
47+
Scale = 1;
48+
Offset = 0;
49+
return Val;
50+
}
51+
52+
if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
53+
if (I->getOpcode() == Instruction::Shl) {
54+
// This is a value scaled by '1 << the shift amt'.
55+
Scale = UINT64_C(1) << RHS->getZExtValue();
56+
Offset = 0;
57+
return I->getOperand(0);
58+
}
59+
60+
if (I->getOpcode() == Instruction::Mul) {
61+
// This value is scaled by 'RHS'.
62+
Scale = RHS->getZExtValue();
63+
Offset = 0;
64+
return I->getOperand(0);
65+
}
66+
67+
if (I->getOpcode() == Instruction::Add) {
68+
// We have X+C. Check to see if we really have (X*C2)+C1,
69+
// where C1 is divisible by C2.
70+
unsigned SubScale;
71+
Value *SubVal =
72+
decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
73+
Offset += RHS->getZExtValue();
74+
Scale = SubScale;
75+
return SubVal;
76+
}
77+
}
78+
}
79+
80+
// Otherwise, we can't look past this.
81+
Scale = 1;
82+
Offset = 0;
83+
return Val;
84+
}
85+
86+
/// If we find a cast of an allocation instruction, try to eliminate the cast by
87+
/// moving the type information into the alloc.
88+
Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI,
89+
AllocaInst &AI) {
90+
PointerType *PTy = cast<PointerType>(CI.getType());
91+
// Opaque pointers don't have an element type we could replace with.
92+
if (PTy->isOpaque())
93+
return nullptr;
94+
95+
IRBuilderBase::InsertPointGuard Guard(Builder);
96+
Builder.SetInsertPoint(&AI);
97+
98+
// Get the type really allocated and the type casted to.
99+
Type *AllocElTy = AI.getAllocatedType();
100+
Type *CastElTy = PTy->getNonOpaquePointerElementType();
101+
if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
102+
103+
// This optimisation does not work for cases where the cast type
104+
// is scalable and the allocated type is not. This because we need to
105+
// know how many times the casted type fits into the allocated type.
106+
// For the opposite case where the allocated type is scalable and the
107+
// cast type is not this leads to poor code quality due to the
108+
// introduction of 'vscale' into the calculations. It seems better to
109+
// bail out for this case too until we've done a proper cost-benefit
110+
// analysis.
111+
bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy);
112+
bool CastIsScalable = isa<ScalableVectorType>(CastElTy);
113+
if (AllocIsScalable != CastIsScalable) return nullptr;
114+
115+
Align AllocElTyAlign = DL.getABITypeAlign(AllocElTy);
116+
Align CastElTyAlign = DL.getABITypeAlign(CastElTy);
117+
if (CastElTyAlign < AllocElTyAlign) return nullptr;
118+
119+
// If the allocation has multiple uses, only promote it if we are strictly
120+
// increasing the alignment of the resultant allocation. If we keep it the
121+
// same, we open the door to infinite loops of various kinds.
122+
if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
123+
124+
// The alloc and cast types should be either both fixed or both scalable.
125+
uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinValue();
126+
uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinValue();
127+
if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
128+
129+
// If the allocation has multiple uses, only promote it if we're not
130+
// shrinking the amount of memory being allocated.
131+
uint64_t AllocElTyStoreSize =
132+
DL.getTypeStoreSize(AllocElTy).getKnownMinValue();
133+
uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinValue();
134+
if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
135+
136+
// See if we can satisfy the modulus by pulling a scale out of the array
137+
// size argument.
138+
unsigned ArraySizeScale;
139+
uint64_t ArrayOffset;
140+
Value *NumElements = // See if the array size is a decomposable linear expr.
141+
decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
142+
143+
// If we can now satisfy the modulus, by using a non-1 scale, we really can
144+
// do the xform.
145+
if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
146+
(AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr;
147+
148+
// We don't currently support arrays of scalable types.
149+
assert(!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0));
150+
151+
unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
152+
Value *Amt = nullptr;
153+
if (Scale == 1) {
154+
Amt = NumElements;
155+
} else {
156+
Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
157+
// Insert before the alloca, not before the cast.
158+
Amt = Builder.CreateMul(Amt, NumElements);
159+
}
160+
161+
if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
162+
Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
163+
Offset, true);
164+
Amt = Builder.CreateAdd(Amt, Off);
165+
}
166+
167+
AllocaInst *New = Builder.CreateAlloca(CastElTy, AI.getAddressSpace(), Amt);
168+
New->setAlignment(AI.getAlign());
169+
New->takeName(&AI);
170+
New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
171+
New->setMetadata(LLVMContext::MD_DIAssignID,
172+
AI.getMetadata(LLVMContext::MD_DIAssignID));
173+
174+
replaceAllDbgUsesWith(AI, *New, *New, DT);
175+
176+
// If the allocation has multiple real uses, insert a cast and change all
177+
// things that used it to use the new cast. This will also hack on CI, but it
178+
// will die soon.
179+
if (!AI.hasOneUse()) {
180+
// New is the allocation instruction, pointer typed. AI is the original
181+
// allocation instruction, also pointer typed. Thus, cast to use is BitCast.
182+
Value *NewCast = Builder.CreateBitCast(New, AI.getType(), "tmpcast");
183+
replaceInstUsesWith(AI, NewCast);
184+
eraseInstFromFunction(AI);
185+
}
186+
return replaceInstUsesWith(CI, New);
187+
}
188+
#endif // INTEL_SYCL_OPAQUEPOINTER_READY
189+
28190
/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
29191
/// true for, actually insert the code to evaluate the expression.
30192
Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
@@ -2634,9 +2796,18 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
26342796
return replaceInstUsesWith(CI, Src);
26352797

26362798
#ifndef INTEL_SYCL_OPAQUEPOINTER_READY
2637-
if (isa<PointerType>(SrcTy) && isa<PointerType>(DestTy))
2799+
if (isa<PointerType>(SrcTy) && isa<PointerType>(DestTy)) {
2800+
// If we are casting a alloca to a pointer to a type of the same
2801+
// size, rewrite the allocation instruction to allocate the "right" type.
2802+
// There is no need to modify malloc calls because it is their bitcast that
2803+
// needs to be cleaned up.
2804+
if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
2805+
if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
2806+
return V;
2807+
26382808
if (Instruction *I = convertBitCastToGEP(CI, Builder, DL))
26392809
return I;
2810+
}
26402811
#endif // INTEL_SYCL_OPAQUEPOINTER_READY
26412812

26422813
if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,9 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
642642

643643
Value *insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi,
644644
bool isSigned, bool Inside);
645+
#ifndef INTEL_SYCL_OPAQUEPOINTER_READY
646+
Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
647+
#endif // INTEL_SYCL_OPAQUEPOINTER_READY
645648
bool mergeStoreIntoSuccessor(StoreInst &SI);
646649

647650
/// Given an initial instruction, check to see if it is the root of a
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
; RUN: opt -opaque-pointers=0 -passes=instcombine -S %s -o - \
2+
; RUN: | FileCheck %s
3+
4+
;; NOTE: This test uses typed pointers because it is testing a code path that
5+
;; doesn't get exercised with opaque pointers. If/when PromoteCastOfAllocation
6+
;; is removed from visitBitCast this test should just be deleted.
7+
8+
;; Check that allocas generated in InstCombine's PromoteCastOfAllocation
9+
;; have DIAssignID copied from the original alloca.
10+
;;
11+
;; $ cat reduce.cpp
12+
;; struct c {
13+
;; c(int);
14+
;; int a, b;
15+
;; };
16+
;; c d() {
17+
;; c e(1);
18+
;; return e;
19+
;; }
20+
;; $ clang -O2 -c -g reduce.cpp -fno-inline -Xclang -disable-llvm-passes -emit-llvm -S \
21+
;; | opt -opaque-pointers=0 -passes=declare-to-assign -S
22+
23+
; CHECK: entry:
24+
; CHECK-NEXT: %retval = alloca i64, align 8, !DIAssignID ![[ID:[0-9]+]]
25+
; CHECK-NEXT: %tmpcast = bitcast i64* %retval to %struct.c*
26+
; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[e:[0-9]+]], metadata !DIExpression(), metadata ![[ID]], metadata i64* %retval, metadata !DIExpression()), !dbg
27+
; CHECK: ![[e]] = !DILocalVariable(name: "e",
28+
29+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
30+
31+
%struct.c = type { i32, i32 }
32+
33+
define dso_local i64 @_Z1dv() !dbg !7 {
34+
entry:
35+
%retval = alloca %struct.c, align 4, !DIAssignID !21
36+
call void @llvm.dbg.assign(metadata i1 undef, metadata !20, metadata !DIExpression(), metadata !21, metadata %struct.c* %retval, metadata !DIExpression()), !dbg !22
37+
call void @_ZN1cC1Ei(%struct.c* %retval, i32 1), !dbg !23
38+
%0 = bitcast %struct.c* %retval to i64*, !dbg !24
39+
%1 = load i64, i64* %0, align 4, !dbg !24
40+
ret i64 %1, !dbg !24
41+
}
42+
43+
declare dso_local void @_ZN1cC1Ei(%struct.c*, i32) unnamed_addr
44+
declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata)
45+
46+
!llvm.dbg.cu = !{!0}
47+
!llvm.module.flags = !{!3, !4, !5, !1000}
48+
!llvm.ident = !{!6}
49+
50+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 12.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
51+
!1 = !DIFile(filename: "reduce.cpp", directory: "/")
52+
!2 = !{}
53+
!3 = !{i32 7, !"Dwarf Version", i32 4}
54+
!4 = !{i32 2, !"Debug Info Version", i32 3}
55+
!5 = !{i32 1, !"wchar_size", i32 4}
56+
!6 = !{!"clang version 12.0.0"}
57+
!7 = distinct !DISubprogram(name: "d", linkageName: "_Z1dv", scope: !1, file: !1, line: 5, type: !8, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !19)
58+
!8 = !DISubroutineType(types: !9)
59+
!9 = !{!10}
60+
!10 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "c", file: !1, line: 1, size: 64, flags: DIFlagTypePassByValue | DIFlagNonTrivial, elements: !11, identifier: "_ZTS1c")
61+
!11 = !{!12, !14, !15}
62+
!12 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !10, file: !1, line: 3, baseType: !13, size: 32)
63+
!13 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
64+
!14 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !10, file: !1, line: 3, baseType: !13, size: 32, offset: 32)
65+
!15 = !DISubprogram(name: "c", scope: !10, file: !1, line: 2, type: !16, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
66+
!16 = !DISubroutineType(types: !17)
67+
!17 = !{null, !18, !13}
68+
!18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer)
69+
!19 = !{!20}
70+
!20 = !DILocalVariable(name: "e", scope: !7, file: !1, line: 6, type: !10)
71+
!21 = distinct !DIAssignID()
72+
!22 = !DILocation(line: 0, scope: !7)
73+
!23 = !DILocation(line: 6, column: 5, scope: !7)
74+
!24 = !DILocation(line: 7, column: 3, scope: !7)
75+
!1000 = !{i32 7, !"debug-info-assignment-tracking", i1 true}

0 commit comments

Comments
 (0)