llvm · john-brawn-arm · Nov 6, 2023 · nikic · Dec 15, 2023 · john-brawn-arm
@@ -2450,10 +2450,51 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // as:
       //   %newptr = getelementptr i32, ptr %ptr, i64 %idx1
       //   %newgep = getelementptr i32, ptr %newptr, i64 %idx2
-      auto *NewPtr = Builder.CreateGEP(GEP.getResultElementType(),
-                                       GEP.getPointerOperand(), Idx1);
-      return GetElementPtrInst::Create(GEP.getResultElementType(), NewPtr,
-                                       Idx2);
+      // If %gep is inbounds then %newgep can be inbounds only if %newptr is as
+      // well, as an inbounds gep requires the base pointer to be inbounds. We
+      // can mark %newptr as inbounds if we have a loop like
+      //   for (i = 0; ...)
+      //     ptr[i+x]
+      // If x is the same in each loop iteration then we know that we have a
+      // series of geps starting with ptr[x], which means that ptr[x] must be
+      // inbounds.
+      auto CheckIdx = [&](Value *LoopIdx, Value *FixedIdx) {
+        // Check that LoopIdx is a loop induction variable that starts at 0.
+        auto *PHI = dyn_cast<PHINode>(LoopIdx);
+        BinaryOperator *BO;
+        Value *Start, *End;
+        if (!PHI || !matchSimpleRecurrence(PHI, BO, Start, End) ||
+            !match(Start, m_Zero()))
+          return false;
+        // If FixedIdx dominates the phi then it's the same in each loop
+        // iteration.
+        if (DT.dominates(FixedIdx, PHI))
+          return true;
+        // If FixedIdx is a binary expression of values that dominate the phi
+        // then it's the same in each loop iteration.
+        Value *Left, *Right;
+        if (match(FixedIdx, m_BinOp(m_Value(Left), m_Value(Right))) &&
+            DT.dominates(Left, PHI) && DT.dominates(Right, PHI))
+          return true;
+        // We can't handle anything else.
+        return false;
+      };
+      bool InBounds = false;
+      if (GEP.isInBounds()) {
+        if (CheckIdx(Idx2, Idx1)) {
+          InBounds = true;
+        } else if (CheckIdx(Idx1, Idx2)) {
+          std::swap(Idx1, Idx2);
+          InBounds = true;
+        }
+      }
+      auto *NewPtr =
+          Builder.CreateGEP(GEP.getResultElementType(), GEP.getPointerOperand(),
+                            Idx1, "", InBounds);
+      auto *NewGEP =
+          GetElementPtrInst::Create(GEP.getResultElementType(), NewPtr, Idx2);
+      NewGEP->setIsInBounds(InBounds);
+      return NewGEP;
     }
     ConstantInt *C;
     if (match(GEP.getOperand(1), m_OneUse(m_SExt(m_OneUse(m_NSWAdd(

diff --git a/llvm/test/Transforms/InstCombine/add-gep.ll b/llvm/test/Transforms/InstCombine/add-gep.ll
@@ -0,0 +1,228 @@
+; RUN: opt < %s -S -passes=instcombine | FileCheck %s
+
+target datalayout = "e-p:32:32"
+
+; CHECK-LABEL: @test1
+define void @test1(i32 %N, i32 %k, ptr %A) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i32 %i, %N
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: for.body:
+; CHECK:      [[GEP:%.*]] = getelementptr inbounds i8, ptr %A, i32 %k
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i8, ptr [[GEP]], i32 %i
+for.body:
+  %add = add i32 %i, %k
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
+  store i8 1, ptr %arrayidx, align 4
+  %inc = add i32 %i, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: @test2
+define void @test2(i32 %N, i32 %k, ptr %A) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i32 %i, %N
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: for.body:
+; CHECK:      [[GEP:%.*]] = getelementptr inbounds i8, ptr %A, i32 %mul
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i8, ptr [[GEP]], i32 %i
+for.body:
+  %mul = mul i32 %k, 42
+  %add = add i32 %i, %mul
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
+  store i8 1, ptr %arrayidx, align 4
+  %inc = add i32 %i, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: @test3
+define void @test3(i32 %N, ptr %A, i32 %val) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i = phi i32 [ 0, %entry ], [ %inc6, %for.inc5 ]
+  %cmp = icmp ult i32 %i, %N
+  br i1 %cmp, label %for.body, label %for.end7
+
+for.body:
+  br label %for.cond1
+
+for.cond1:
+  %j = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
+  %cmp2 = icmp ult i32 %j, %N
+  br i1 %cmp2, label %for.body3, label %for.inc5
+
+; CHECK-LABEL: for.body3:
+; CHECK:      [[GEP:%.*]] = getelementptr inbounds i8, ptr %A, i32 %i
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i8, ptr [[GEP]], i32 %j
+for.body3:
+  %add = add i32 %i, %j
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
+  store i8 1, ptr %arrayidx, align 4
+  %inc = add i32 %j, 1
+  br label %for.cond1
+
+for.inc5:
+  %inc6 = add i32 %i, 1
+  br label %for.cond
+
+for.end7:
+  ret void
+}
+
+; CHECK-LABEL: @test4
+define void @test4(i32 %N, ptr %A, i32 %val) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i = phi i32 [ 0, %entry ], [ %inc6, %for.inc5 ]
+  %cmp = icmp ult i32 %i, %N
+  br i1 %cmp, label %for.body, label %for.end7
+
+for.body:
+  br label %for.cond1
+
+for.cond1:
+  %j = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
+  %cmp2 = icmp ult i32 %j, %N
+  br i1 %cmp2, label %for.body3, label %for.inc5
+
+; CHECK-LABEL: for.body3:
+; CHECK:      [[GEP:%.*]] = getelementptr inbounds i8, ptr %A, i32 %mul
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i8, ptr [[GEP]], i32 %j
+for.body3:
+  %mul = mul i32 %i, %N
+  %add = add i32 %mul, %j
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
+  store i8 1, ptr %arrayidx, align 4
+  %inc = add i32 %j, 1
+  br label %for.cond1
+
+for.inc5:
+  %inc6 = add i32 %i, 1
+  br label %for.cond
+
+for.end7:
+  ret void
+}
+
+; We can't use inbounds here because the add operand doesn't dominate the loop
+; CHECK-LABEL: @test5
+define void @test5(i32 %N, ptr %A, ptr %B) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i32 %i, %N
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: for.body:
+; CHECK:      [[GEP:%.*]] = getelementptr i8, ptr %A, i32 %i
+; CHECK-NEXT: %arrayidx = getelementptr i8, ptr [[GEP]], i32 %0
+for.body:
+  %0 = load i32, ptr %B, align 4
+  %add = add i32 %i, %0
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
+  store i8 1, ptr %arrayidx, align 4
+  %inc = add i32 %i, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+; We can't use inbounds here because we don't have a loop
+; CHECK-LABEL: @test6
+define void @test6(i32 %k, i32 %j, ptr %A) {
+entry:
+  %cmp = icmp ugt i32 %k, 10
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  br label %if.end
+
+if.else:
+  br label %if.end
+
+; CHECK-LABEL: if.end:
+; CHECK:      [[GEP:%.*]] = getelementptr i8, ptr %A, i32 %val
+; CHECK-NEXT: %arrayidx = getelementptr i8, ptr [[GEP]], i32 %j
+if.end:
+  %val = phi i32 [ 0, %if.then ], [ 1, %if.else ]
+  %add = add i32 %val, %j
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
+  store i8 1, ptr %arrayidx, align 4
+  ret void
+}
+
+; Inbounds gep would be invalid because of potential overflow in the add, though
+; we don't convert to gep+gep as we insert an explicit sext instead of using i16
+; gep offset.
+; CHECK-LABEL: @test7
+define void @test7(i16 %N, i16 %k, ptr %A) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i = phi i16 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i16 %i, %N
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: for.body:
+; CHECK:      %add = add i16 %i, %k
+; CHECK-NEXT: [[SEXT:%.*]] = sext i16 %add to i32
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i8, ptr %A, i32 [[SEXT]]
+for.body:
+  %add = add i16 %i, %k
+  %arrayidx = getelementptr inbounds i8, ptr %A, i16 %add
+  store i8 1, ptr %arrayidx, align 4
+  %inc = add i16 %i, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+; %i starts at 1 so we can't use inbounds
+; CHECK-LABEL: @test8
+define void @test8(i32 %N, i32 %k, ptr %A) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i = phi i32 [ 1, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i32 %i, %N
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: for.body:
+; CHECK:      [[GEP:%.*]] = getelementptr i8, ptr %A, i32 %i
+; CHECK-NEXT: %arrayidx = getelementptr i8, ptr [[GEP]], i32 %k
+for.body:
+  %add = add i32 %i, %k
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
+  store i8 1, ptr %arrayidx, align 4
+  %inc = add i32 %i, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll b/llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll
@@ -23,8 +23,8 @@ define void @_Z4testPcl(ptr %out, i64 %size) {
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[OUT:%.*]], i64 [[I_0]]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[OUT]], i64 [[I_0]]
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[SIZE]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[OUT]], i64 [[SIZE]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 [[I_0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 1, !llvm.access.group [[ACC_GRP0:![0-9]+]]
 ; CHECK-NEXT:    store i16 [[TMP1]], ptr [[ARRAYIDX]], align 1, !llvm.access.group [[ACC_GRP0]]
 ; CHECK-NEXT:    br label [[FOR_INC]]

diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -348,11 +348,11 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
 ; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; IND:       vector.body:
 ; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IND-NEXT:    [[TMP5:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
-; IND-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[TMP5]], i64 [[OFFSET]]
+; IND-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET]]
+; IND-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[INDEX]]
 ; IND-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
-; IND-NEXT:    [[TMP7:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
-; IND-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[TMP7]], i64 [[OFFSET2]]
+; IND-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET2]]
+; IND-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[INDEX]]
 ; IND-NEXT:    [[WIDE_LOAD4:%.*]] = load <2 x float>, ptr [[TMP8]], align 4, !alias.scope [[META7]]
 ; IND-NEXT:    [[TMP9:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD4]]
 ; IND-NEXT:    [[TMP10:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP9]]
@@ -408,13 +408,13 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
 ; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL:       vector.body:
 ; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NEXT:    [[TMP5:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
-; UNROLL-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[TMP5]], i64 [[OFFSET]]
+; UNROLL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET]]
+; UNROLL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[INDEX]]
 ; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
 ; UNROLL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 2
 ; UNROLL-NEXT:    [[WIDE_LOAD4:%.*]] = load <2 x float>, ptr [[TMP7]], align 4, !alias.scope [[META4]], !noalias [[META7]]
-; UNROLL-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
-; UNROLL-NEXT:    [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[OFFSET2]]
+; UNROLL-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET2]]
+; UNROLL-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[INDEX]]
 ; UNROLL-NEXT:    [[WIDE_LOAD5:%.*]] = load <2 x float>, ptr [[TMP9]], align 4, !alias.scope [[META7]]
 ; UNROLL-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 2
 ; UNROLL-NEXT:    [[WIDE_LOAD6:%.*]] = load <2 x float>, ptr [[TMP10]], align 4, !alias.scope [[META7]]
@@ -551,13 +551,13 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
 ; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; INTERLEAVE:       vector.body:
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; INTERLEAVE-NEXT:    [[TMP5:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
-; INTERLEAVE-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[TMP5]], i64 [[OFFSET]]
+; INTERLEAVE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET]]
+; INTERLEAVE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[INDEX]]
 ; INTERLEAVE-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
 ; INTERLEAVE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 4
 ; INTERLEAVE-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP7]], align 4, !alias.scope [[META4]], !noalias [[META7]]
-; INTERLEAVE-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
-; INTERLEAVE-NEXT:    [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[OFFSET2]]
+; INTERLEAVE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET2]]
+; INTERLEAVE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[INDEX]]
 ; INTERLEAVE-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !alias.scope [[META7]]
 ; INTERLEAVE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 4
 ; INTERLEAVE-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP10]], align 4, !alias.scope [[META7]]