CodeGenPrep: preserve inbounds attribute when sinking GEPs.

TNorthover · TNorthover · commit 8935aca9c74b · 2019-03-12T15:22:23.000Z
Targets can potentially emit more efficient code if they know address
computations never overflow. For example ILP32 code on AArch64 (which only has
64-bit address computation) can ignore the possibility of overflow with this
extra information.

llvm-svn: 355926
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2073,6 +2073,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
   Value *BaseReg = nullptr;
   Value *ScaledReg = nullptr;
   Value *OriginalValue = nullptr;
+  bool InBounds = true;
 
   enum FieldName {
     NoField        = 0x00,
@@ -2084,6 +2085,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
     MultipleFields = 0xff
   };
 
+
   ExtAddrMode() = default;
 
   void print(raw_ostream &OS) const;
@@ -2102,6 +2104,10 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
         ScaledReg->getType() != other.ScaledReg->getType())
       return MultipleFields;
 
+    // Conservatively reject 'inbounds' mismatches.
+    if (InBounds != other.InBounds)
+      return MultipleFields;
+
     // Check each field to see if it differs.
     unsigned Result = NoField;
     if (BaseReg != other.BaseReg)
@@ -2200,6 +2206,8 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
 void ExtAddrMode::print(raw_ostream &OS) const {
   bool NeedPlus = false;
   OS << "[";
+  if (InBounds)
+    OS << "inbounds ";
   if (BaseGV) {
     OS << (NeedPlus ? " + " : "")
        << "GV:";
@@ -3498,6 +3506,7 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
   ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
   if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
       match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+    TestAddrMode.InBounds = false;
     TestAddrMode.ScaledReg = AddLHS;
     TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
 
@@ -4072,6 +4081,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
         TPT.getRestorationPoint();
 
+    AddrMode.InBounds = false;
     if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
         matchAddr(AddrInst->getOperand(0), Depth+1))
       return true;
@@ -4098,6 +4108,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
   case Instruction::Mul:
   case Instruction::Shl: {
     // Can only handle X*C and X << C.
+    AddrMode.InBounds = false;
     ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
     if (!RHS || RHS->getBitWidth() > 64)
       return false;
@@ -4149,8 +4160,11 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
       if (ConstantOffset == 0 ||
           TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
         // Check to see if we can fold the base pointer in too.
-        if (matchAddr(AddrInst->getOperand(0), Depth+1))
+        if (matchAddr(AddrInst->getOperand(0), Depth+1)) {
+          if (!cast<GEPOperator>(AddrInst)->isInBounds())
+            AddrMode.InBounds = false;
           return true;
+        }
       } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
                  TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
                  ConstantOffset > 0) {
@@ -4186,6 +4200,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
 
     // See if the scale and offset amount is valid for this target.
     AddrMode.BaseOffs += ConstantOffset;
+    if (!cast<GEPOperator>(AddrInst)->isInBounds())
+      AddrMode.InBounds = false;
 
     // Match the base operand of the GEP.
     if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
@@ -4871,7 +4887,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
           // SDAG consecutive load/store merging.
           if (ResultPtr->getType() != I8PtrTy)
             ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
-          ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+          ResultPtr =
+              AddrMode.InBounds
+                  ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
+                                              "sunkaddr")
+                  : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
         }
 
         ResultIndex = V;
@@ -4882,7 +4902,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
       } else {
         if (ResultPtr->getType() != I8PtrTy)
           ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
-        SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+        SunkAddr =
+            AddrMode.InBounds
+                ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
+                                            "sunkaddr")
+                : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
       }
 
       if (SunkAddr->getType() != Addr->getType())
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -492,7 +492,7 @@ done:
 %struct.foo = type { [3 x float], [3 x float] }
 
 ; OPT-LABEL: @sink_ds_address(
-; OPT: getelementptr i8,
+; OPT: getelementptr inbounds i8,
 
 ; GCN-LABEL: {{^}}sink_ds_address:
 ; GCN: s_load_dword [[SREG1:s[0-9]+]],
diff --git a/llvm/test/CodeGen/Thumb/addr-modes.ll b/llvm/test/CodeGen/Thumb/addr-modes.ll
@@ -14,7 +14,7 @@ target triple = "thumbv6m-arm-none-eabi"
 
 ; Test case 01: %n is scaled by 4 (size of i32).
 ; Expected: GEP cannot be folded into LOAD.
-; CHECK: local addrmode: [Base:%arrayidx]
+; CHECK: local addrmode: [inbounds Base:%arrayidx]
 define i32 @load01(i32* %p, i32 %n) nounwind {
 entry:
   %arrayidx = getelementptr inbounds i32, i32* %p, i32 %n
@@ -24,7 +24,7 @@ entry:
 
 ; Test case 02: No scale of %n is needed because the size of i8 is 1.
 ; Expected: GEP can be folded into LOAD.
-; CHECK: local addrmode: [Base:%p + 1*%n]
+; CHECK: local addrmode: [inbounds Base:%p + 1*%n]
 define i8 @load02(i8* %p, i32 %n) nounwind {
 entry:
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %n
diff --git a/llvm/test/Transforms/CodeGenPrepare/Mips/pr35209.ll b/llvm/test/Transforms/CodeGenPrepare/Mips/pr35209.ll
@@ -54,7 +54,7 @@ cl:                                               ; preds = %sw.bb, %entry
 ; CHECK-NOT: %{{[0-9]+}}  = load %struct.bt*, %struct.bt** %bw
 
 ; CHECK: %[[I1:[0-9]+]] = bitcast %struct.az* %[[I0]] to i8*
-; CHECK-NEXT: %sunkaddr = getelementptr i8, i8* %[[I1]], i64 8
+; CHECK-NEXT: %sunkaddr = getelementptr inbounds i8, i8* %[[I1]], i64 8
 ; CHECK-NEXT: %[[I2:[0-9]+]] = bitcast i8* %sunkaddr to %struct.bt**
 ; CHECK-NEXT: %{{[0-9]+}} = load %struct.bt*, %struct.bt** %[[I2]]
 ; CHECK-NEXT: tail call void (i8*, ...) @a
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll b/llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll
@@ -219,7 +219,7 @@ define void @nophi(i32* %p) {
 ; CHECK-NEXT:    br label [[INDIRECTGOTO]]
 ; CHECK:       indirectgoto:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to i8*
-; CHECK-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, i8* [[TMP0]], i64 4
+; CHECK-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[SUNKADDR]] to i32*
 ; CHECK-NEXT:    [[NEWP:%.*]] = load i32, i32* [[TMP1]], align 4
 ; CHECK-NEXT:    [[IDX:%.*]] = sext i32 [[NEWP]] to i64
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll
@@ -41,7 +41,7 @@ if.then:
   br label %fallthrough
 
 fallthrough:
-; CHECK: getelementptr i8, {{.+}} 40
+; CHECK: getelementptr inbounds i8, {{.+}} 40
   %b = phi i64* [%b1, %entry], [%b2, %if.then]
   %c = phi i32* [%c1, %entry], [%c2, %if.then]
   %v = load i32, i32* %c, align 4
@@ -111,7 +111,7 @@ if.then:
   br label %fallthrough
 
 fallthrough:
-; CHECK: getelementptr i8, {{.+}} 40
+; CHECK: getelementptr inbounds i8, {{.+}} 40
   %b = phi i64* [%b1, %entry], [%b2, %if.then]
   %c = phi i32* [%c1, %entry], [%c2, %if.then]
   %v = load i32, i32* %c, align 4
@@ -199,7 +199,7 @@ if.then:
   br label %fallthrough
 
 fallthrough:
-; CHECK: getelementptr i8, {{.+}} 40
+; CHECK: getelementptr inbounds i8, {{.+}} 40
   %c = phi i32* [%c3, %loop], [%c2, %if.then]
   %b = phi i64* [%b3, %loop], [%b2, %if.then]
   %v = load volatile i32, i32* %c, align 4
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; Can we sink single addressing mode computation to use?
 define void @test1(i1 %cond, i64* %base) {
 ; CHECK-LABEL: @test1
-; CHECK: getelementptr i8, {{.+}} 40
+; CHECK: getelementptr inbounds i8, {{.+}} 40
 entry:
   %addr = getelementptr inbounds i64, i64* %base, i64 5
   %casted = bitcast i64* %addr to i32*
@@ -35,15 +35,15 @@ entry:
 
 if.then:
 ; CHECK-LABEL: if.then:
-; CHECK: getelementptr i8, {{.+}} 40
+; CHECK: getelementptr inbounds i8, {{.+}} 40
   %v1 = load i32, i32* %casted, align 4
   call void @foo(i32 %v1)
   %cmp = icmp eq i32 %v1, 0
   br i1 %cmp, label %next, label %fallthrough
 
 next:
 ; CHECK-LABEL: next:
-; CHECK: getelementptr i8, {{.+}} 40
+; CHECK: getelementptr inbounds i8, {{.+}} 40
   %v2 = load i32, i32* %casted, align 4
   call void @foo(i32 %v2)
   br label %fallthrough
@@ -63,10 +63,10 @@ entry:
 
 if.then:
 ; CHECK-LABEL: if.then:
-; CHECK: getelementptr i8, {{.+}} 40
+; CHECK: getelementptr inbounds i8, {{.+}} 40
   %v1 = load i32, i32* %casted, align 4
   call void @foo(i32 %v1)
-; CHECK-NOT: getelementptr i8, {{.+}} 40
+; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
   %v2 = load i32, i32* %casted, align 4
   call void @foo(i32 %v2)
   br label %fallthrough
@@ -86,7 +86,7 @@ entry:
 
 if.then:
 ; CHECK-LABEL: if.then:
-; CHECK: getelementptr i8, {{.+}} 40
+; CHECK: getelementptr inbounds i8, {{.+}} 40
   %v1 = load i32, i32* %casted, align 4
   call void @foo(i32 %v1)
   %cmp = icmp eq i32 %v1, 0
@@ -97,7 +97,7 @@ fallthrough:
 
 rare.1:
 ; CHECK-LABEL: rare.1:
-; CHECK: getelementptr i8, {{.+}} 40
+; CHECK: getelementptr inbounds i8, {{.+}} 40
   call void @slowpath(i32 %v1, i32* %casted) cold
   br label %fallthrough
 }
@@ -106,14 +106,14 @@ rare.1:
 define void @test5(i1 %cond, i64* %base) {
 ; CHECK-LABEL: @test5
 entry:
-; CHECK: %addr = getelementptr
+; CHECK: %addr = getelementptr inbounds
   %addr = getelementptr inbounds i64, i64* %base, i64 5
   %casted = bitcast i64* %addr to i32*
   br i1 %cond, label %if.then, label %fallthrough
 
 if.then:
 ; CHECK-LABEL: if.then:
-; CHECK-NOT: getelementptr i8, {{.+}} 40
+; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
   %v1 = load i32, i32* %casted, align 4
   call void @foo(i32 %v1)
   %cmp = icmp eq i32 %v1, 0
@@ -138,7 +138,7 @@ entry:
 
 if.then:
 ; CHECK-LABEL: if.then:
-; CHECK-NOT: getelementptr i8, {{.+}} 40
+; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
   %v1 = load i32, i32* %casted, align 4
   call void @foo(i32 %v1)
   %cmp = icmp eq i32 %v1, 0
@@ -164,15 +164,15 @@ entry:
 
 if.then:
 ; CHECK-LABEL: if.then:
-; CHECK: getelementptr i8, {{.+}} 40
+; CHECK: getelementptr inbounds i8, {{.+}} 40
   %v1 = load i32, i32* %casted, align 4
   call void @foo(i32 %v1)
   %cmp = icmp eq i32 %v1, 0
   br i1 %cmp, label %rare.1, label %next
 
 next:
 ; CHECK-LABEL: next:
-; CHECK: getelementptr i8, {{.+}} 40
+; CHECK: getelementptr inbounds i8, {{.+}} 40
   %v2 = load i32, i32* %casted, align 4
   call void @foo(i32 %v2)
   %cmp2 = icmp eq i32 %v2, 0
@@ -183,13 +183,13 @@ fallthrough:
 
 rare.1:
 ; CHECK-LABEL: rare.1:
-; CHECK: getelementptr i8, {{.+}} 40
+; CHECK: getelementptr inbounds i8, {{.+}} 40
   call void @slowpath(i32 %v1, i32* %casted) cold
   br label %next
 
 rare.2:
 ; CHECK-LABEL: rare.2:
-; CHECK: getelementptr i8, {{.+}} 40
+; CHECK: getelementptr inbounds i8, {{.+}} 40
   call void @slowpath(i32 %v2, i32* %casted) cold
   br label %fallthrough
 }
@@ -240,7 +240,7 @@ if.then:
 
 backedge:
 ; CHECK-LABEL: backedge:
-; CHECK: getelementptr i8, {{.+}} 40
+; CHECK: getelementptr inbounds i8, {{.+}} 40
   %casted.merged = phi i32* [%casted.loop, %header], [%casted.1, %if.then]
   %v = load i32, i32* %casted.merged, align 4
   call void @foo(i32 %v)
@@ -256,7 +256,7 @@ exit:
 ; address computation.
 define void @test10(i1 %cond, i64* %base) {
 ; CHECK-LABEL: @test10
-; CHECK: getelementptr i8, {{.+}} 40
+; CHECK: getelementptr inbounds i8, {{.+}} 40
 ; CHECK-NOT: select
 entry:
   %gep1 = getelementptr inbounds i64, i64* %base, i64 5
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu"
 
 ; CHECK-LABEL: @load_cast_gep
 ; GEP: [[CAST:%[0-9]+]] = addrspacecast i64* %base to i8 addrspace(1)*
-; GEP: getelementptr i8, i8 addrspace(1)* [[CAST]], i64 40
+; GEP: getelementptr inbounds i8, i8 addrspace(1)* [[CAST]], i64 40
 define void @load_cast_gep(i1 %cond, i64* %base) {
 entry:
   %addr = getelementptr inbounds i64, i64* %base, i64 5
@@ -23,7 +23,7 @@ fallthrough:
 
 ; CHECK-LABEL: @store_gep_cast
 ; GEP: [[CAST:%[0-9]+]] = addrspacecast i64* %base to i8 addrspace(1)*
-; GEP: getelementptr i8, i8 addrspace(1)* [[CAST]], i64 20
+; GEP: getelementptr inbounds i8, i8 addrspace(1)* [[CAST]], i64 20
 define void @store_gep_cast(i1 %cond, i64* %base) {
 entry:
   %casted = addrspacecast i64* %base to i32 addrspace(1)*