llvm
diff --git a/‎llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
Lines changed: 8 additions & 50 deletions b/‎llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
Lines changed: 8 additions & 50 deletions
diff --git a/‎llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll
Lines changed: 2 additions & 2 deletions b/‎llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll
Lines changed: 2 additions & 2 deletions
diff --git a/‎llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
Lines changed: 8 additions & 8 deletions b/‎llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
Lines changed: 8 additions & 8 deletions
diff --git a/‎llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn.ll
Lines changed: 13 additions & 13 deletions b/‎llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn.ll
Lines changed: 13 additions & 13 deletions
@@ -1093,67 +1093,25 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
   // => add the offset
   //
   //   %gep2                       ; clone of %gep
-  //   %new.gep = gep %gep2, <offset / sizeof(*%gep)>
+  //   %new.gep = gep i8, %gep2, %offset
   //   %gep                        ; will be removed
   //   ... %gep ...
   //
   // => replace all uses of %gep with %new.gep and remove %gep
   //
   //   %gep2                       ; clone of %gep
-  //   %new.gep = gep %gep2, <offset / sizeof(*%gep)>
-  //   ... %new.gep ...
-  //
-  // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
-  // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
-  // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
-  // type of %gep.
-  //
-  //   %gep2                       ; clone of %gep
-  //   %0       = bitcast %gep2 to i8*
-  //   %uglygep = gep %0, <offset>
-  //   %new.gep = bitcast %uglygep to <type of %gep>
+  //   %new.gep = gep i8, %gep2, %offset
   //   ... %new.gep ...
   Instruction *NewGEP = GEP->clone();
   NewGEP->insertBefore(GEP);
 
-  // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned =
-  // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is
-  // used with unsigned integers later.
-  int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
-      DL->getTypeAllocSize(GEP->getResultElementType()));
   Type *PtrIdxTy = DL->getIndexType(GEP->getType());
-  if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
-    // Very likely. As long as %gep is naturally aligned, the byte offset we
-    // extracted should be a multiple of sizeof(*%gep).
-    int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
-    NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
-                                       ConstantInt::get(PtrIdxTy, Index, true),
-                                       GEP->getName(), GEP);
-    NewGEP->copyMetadata(*GEP);
-    // Inherit the inbounds attribute of the original GEP.
-    cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
-  } else {
-    // Unlikely but possible. For example,
-    // #pragma pack(1)
-    // struct S {
-    //   int a[3];
-    //   int64 b[8];
-    // };
-    // #pragma pack()
-    //
-    // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
-    // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
-    // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
-    // sizeof(int64).
-    //
-    // Emit an uglygep in this case.
-    IRBuilder<> Builder(GEP);
-    NewGEP = cast<Instruction>(Builder.CreateGEP(
-        Builder.getInt8Ty(), NewGEP,
-        {ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)}, "uglygep",
-        GEPWasInBounds));
-    NewGEP->copyMetadata(*GEP);
-  }
+  IRBuilder<> Builder(GEP);
+  NewGEP = cast<Instruction>(Builder.CreateGEP(
+      Builder.getInt8Ty(), NewGEP,
+      {ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)},
+      GEP->getName(), GEPWasInBounds));
+  NewGEP->copyMetadata(*GEP);
 
   GEP->replaceAllUsesWith(NewGEP);
   GEP->eraseFromParent();
 
@@ -20,7 +20,7 @@ define ptr @test1(ptr %base, i64 %idx) #0 {
 define ptr @test2(ptr %base, i64 %idx) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 3, i64 [[IDX:%.*]]
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr float, ptr [[TMP1]], i64 1
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4
 ; CHECK-NEXT:    ret ptr [[GEP2]]
 ;
   %idx.next = add nuw nsw i64 %idx, 1
@@ -57,7 +57,7 @@ define ptr @test4(ptr %base, i64 %idx) {
 define ptr @test5(ptr %base, i64 %idx) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [8 x <vscale x 4 x float>], ptr [[BASE:%.*]], i64 1, i64 3, i64 [[IDX:%.*]]
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr float, ptr [[TMP1]], i64 1
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4
 ; CHECK-NEXT:    ret ptr [[GEP2]]
 ;
   %idx.next = add nuw nsw i64 %idx, 1
 
@@ -11,9 +11,9 @@ define amdgpu_kernel void @sum_of_array(i32 %x, i32 %y, ptr addrspace(1) nocaptu
 ; IR-NEXT:    [[TMP:%.*]] = sext i32 [[Y]] to i64
 ; IR-NEXT:    [[TMP1:%.*]] = sext i32 [[X]] to i64
 ; IR-NEXT:    [[TMP2:%.*]] = getelementptr [4096 x [32 x float]], ptr addrspace(4) @array, i64 0, i64 [[TMP1]], i64 [[TMP]]
-; IR-NEXT:    [[TMP82:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP2]], i64 1
-; IR-NEXT:    [[TMP144:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP2]], i64 32
-; IR-NEXT:    [[TMP187:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP2]], i64 33
+; IR-NEXT:    [[TMP82:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i64 4
+; IR-NEXT:    [[TMP144:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i64 128
+; IR-NEXT:    [[TMP187:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i64 132
 ; IR-NEXT:    store float 0.000000e+00, ptr addrspace(1) [[OUTPUT]], align 4
 ; IR-NEXT:    ret void
 ;
@@ -51,7 +51,7 @@ define amdgpu_kernel void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, pt
 ; IR-NEXT:    [[TMP2:%.*]] = getelementptr [4096 x [4 x float]], ptr addrspace(4) @array2, i64 0, i64 [[TMP1]], i64 [[TMP]]
 ; IR-NEXT:    [[TMP6:%.*]] = add i32 [[Y]], 255
 ; IR-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
-; IR-NEXT:    [[TMP82:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP2]], i64 255
+; IR-NEXT:    [[TMP82:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i64 1020
 ; IR-NEXT:    [[TMP12:%.*]] = add i32 [[X]], 256
 ; IR-NEXT:    [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
 ; IR-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [4096 x [4 x float]], ptr addrspace(4) @array2, i64 0, i64 [[TMP13]], i64 [[TMP]]
@@ -91,13 +91,13 @@ define amdgpu_kernel void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y
 ; IR-NEXT:    [[TMP2:%.*]] = getelementptr [4096 x [4 x float]], ptr addrspace(3) @lds_array, i32 0, i32 [[X]], i32 [[Y]]
 ; IR-NEXT:    [[TMP4:%.*]] = load float, ptr addrspace(3) [[TMP2]], align 4
 ; IR-NEXT:    [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00
-; IR-NEXT:    [[TMP82:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP2]], i32 255
+; IR-NEXT:    [[TMP82:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i32 1020
 ; IR-NEXT:    [[TMP10:%.*]] = load float, ptr addrspace(3) [[TMP82]], align 4
 ; IR-NEXT:    [[TMP11:%.*]] = fadd float [[TMP5]], [[TMP10]]
-; IR-NEXT:    [[TMP144:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP2]], i32 16128
+; IR-NEXT:    [[TMP144:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i32 64512
 ; IR-NEXT:    [[TMP16:%.*]] = load float, ptr addrspace(3) [[TMP144]], align 4
 ; IR-NEXT:    [[TMP17:%.*]] = fadd float [[TMP11]], [[TMP16]]
-; IR-NEXT:    [[TMP187:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP2]], i32 16383
+; IR-NEXT:    [[TMP187:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i32 65532
 ; IR-NEXT:    [[TMP20:%.*]] = load float, ptr addrspace(3) [[TMP187]], align 4
 ; IR-NEXT:    [[TMP21:%.*]] = fadd float [[TMP17]], [[TMP20]]
 ; IR-NEXT:    store float [[TMP21]], ptr addrspace(1) [[OUTPUT]], align 4
@@ -134,7 +134,7 @@ define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float,
 ; IR-NEXT:    [[TMP27:%.*]] = shl i32 [[TMP23]], 2
 ; IR-NEXT:    [[TMP28:%.*]] = sext i32 [[TMP27]] to i64
 ; IR-NEXT:    [[TMP29:%.*]] = getelementptr [0 x <4 x i32>], ptr addrspace(4) [[TMP1]], i64 0, i64 [[TMP28]], !amdgpu.uniform [[META0]]
-; IR-NEXT:    [[TMP30:%.*]] = getelementptr <4 x i32>, ptr addrspace(4) [[TMP29]], i64 3, !amdgpu.uniform [[META0]]
+; IR-NEXT:    [[TMP30:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP29]], i64 48, !amdgpu.uniform [[META0]]
 ; IR-NEXT:    [[TMP31:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP30]], align 16, !invariant.load [[META0]]
 ; IR-NEXT:    [[TMP32:%.*]] = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> [[TMP26]], <4 x i32> [[TMP31]], i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #[[ATTR3]]
 ; IR-NEXT:    [[TMP33:%.*]] = extractelement <4 x float> [[TMP32]], i32 0
 
@@ -26,15 +26,15 @@ define void @sum_of_array(i32 %x, i32 %y, ptr nocapture %output) {
 ; IR-NEXT:    [[I3:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr
 ; IR-NEXT:    [[I4:%.*]] = load float, ptr [[I3]], align 4
 ; IR-NEXT:    [[I5:%.*]] = fadd float [[I4]], 0.000000e+00
-; IR-NEXT:    [[I87:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 1
+; IR-NEXT:    [[I87:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 4
 ; IR-NEXT:    [[I9:%.*]] = addrspacecast ptr addrspace(3) [[I87]] to ptr
 ; IR-NEXT:    [[I10:%.*]] = load float, ptr [[I9]], align 4
 ; IR-NEXT:    [[I11:%.*]] = fadd float [[I5]], [[I10]]
-; IR-NEXT:    [[I1412:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 32
+; IR-NEXT:    [[I1412:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 128
 ; IR-NEXT:    [[I15:%.*]] = addrspacecast ptr addrspace(3) [[I1412]] to ptr
 ; IR-NEXT:    [[I16:%.*]] = load float, ptr [[I15]], align 4
 ; IR-NEXT:    [[I17:%.*]] = fadd float [[I11]], [[I16]]
-; IR-NEXT:    [[I1818:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 33
+; IR-NEXT:    [[I1818:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 132
 ; IR-NEXT:    [[I19:%.*]] = addrspacecast ptr addrspace(3) [[I1818]] to ptr
 ; IR-NEXT:    [[I20:%.*]] = load float, ptr [[I19]], align 4
 ; IR-NEXT:    [[I21:%.*]] = fadd float [[I17]], [[I20]]
@@ -88,15 +88,15 @@ define void @sum_of_array2(i32 %x, i32 %y, ptr nocapture %output) {
 ; IR-NEXT:    [[I3:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr
 ; IR-NEXT:    [[I4:%.*]] = load float, ptr [[I3]], align 4
 ; IR-NEXT:    [[I5:%.*]] = fadd float [[I4]], 0.000000e+00
-; IR-NEXT:    [[I77:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 1
+; IR-NEXT:    [[I77:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 4
 ; IR-NEXT:    [[I8:%.*]] = addrspacecast ptr addrspace(3) [[I77]] to ptr
 ; IR-NEXT:    [[I9:%.*]] = load float, ptr [[I8]], align 4
 ; IR-NEXT:    [[I10:%.*]] = fadd float [[I5]], [[I9]]
-; IR-NEXT:    [[I1212:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 32
+; IR-NEXT:    [[I1212:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 128
 ; IR-NEXT:    [[I13:%.*]] = addrspacecast ptr addrspace(3) [[I1212]] to ptr
 ; IR-NEXT:    [[I14:%.*]] = load float, ptr [[I13]], align 4
 ; IR-NEXT:    [[I15:%.*]] = fadd float [[I10]], [[I14]]
-; IR-NEXT:    [[I1618:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 33
+; IR-NEXT:    [[I1618:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 132
 ; IR-NEXT:    [[I17:%.*]] = addrspacecast ptr addrspace(3) [[I1618]] to ptr
 ; IR-NEXT:    [[I18:%.*]] = load float, ptr [[I17]], align 4
 ; IR-NEXT:    [[I19:%.*]] = fadd float [[I15]], [[I18]]
@@ -149,15 +149,15 @@ define void @sum_of_array3(i32 %x, i32 %y, ptr nocapture %output) {
 ; IR-NEXT:    [[I3:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr
 ; IR-NEXT:    [[I4:%.*]] = load float, ptr [[I3]], align 4
 ; IR-NEXT:    [[I5:%.*]] = fadd float [[I4]], 0.000000e+00
-; IR-NEXT:    [[I87:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 1
+; IR-NEXT:    [[I87:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 4
 ; IR-NEXT:    [[I9:%.*]] = addrspacecast ptr addrspace(3) [[I87]] to ptr
 ; IR-NEXT:    [[I10:%.*]] = load float, ptr [[I9]], align 4
 ; IR-NEXT:    [[I11:%.*]] = fadd float [[I5]], [[I10]]
-; IR-NEXT:    [[I1412:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 32
+; IR-NEXT:    [[I1412:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 128
 ; IR-NEXT:    [[I15:%.*]] = addrspacecast ptr addrspace(3) [[I1412]] to ptr
 ; IR-NEXT:    [[I16:%.*]] = load float, ptr [[I15]], align 4
 ; IR-NEXT:    [[I17:%.*]] = fadd float [[I11]], [[I16]]
-; IR-NEXT:    [[I1818:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 33
+; IR-NEXT:    [[I1818:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 132
 ; IR-NEXT:    [[I19:%.*]] = addrspacecast ptr addrspace(3) [[I1818]] to ptr
 ; IR-NEXT:    [[I20:%.*]] = load float, ptr [[I19]], align 4
 ; IR-NEXT:    [[I21:%.*]] = fadd float [[I17]], [[I20]]
@@ -209,15 +209,15 @@ define void @sum_of_array4(i32 %x, i32 %y, ptr nocapture %output) {
 ; IR-NEXT:    [[I3:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr
 ; IR-NEXT:    [[I4:%.*]] = load float, ptr [[I3]], align 4
 ; IR-NEXT:    [[I5:%.*]] = fadd float [[I4]], 0.000000e+00
-; IR-NEXT:    [[I77:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 1
+; IR-NEXT:    [[I77:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 4
 ; IR-NEXT:    [[I8:%.*]] = addrspacecast ptr addrspace(3) [[I77]] to ptr
 ; IR-NEXT:    [[I9:%.*]] = load float, ptr [[I8]], align 4
 ; IR-NEXT:    [[I10:%.*]] = fadd float [[I5]], [[I9]]
-; IR-NEXT:    [[I1212:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 32
+; IR-NEXT:    [[I1212:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 128
 ; IR-NEXT:    [[I13:%.*]] = addrspacecast ptr addrspace(3) [[I1212]] to ptr
 ; IR-NEXT:    [[I14:%.*]] = load float, ptr [[I13]], align 4
 ; IR-NEXT:    [[I15:%.*]] = fadd float [[I10]], [[I14]]
-; IR-NEXT:    [[I1618:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 33
+; IR-NEXT:    [[I1618:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 132
 ; IR-NEXT:    [[I17:%.*]] = addrspacecast ptr addrspace(3) [[I1618]] to ptr
 ; IR-NEXT:    [[I18:%.*]] = load float, ptr [[I17]], align 4
 ; IR-NEXT:    [[I19:%.*]] = fadd float [[I15]], [[I18]]
@@ -270,7 +270,7 @@ define void @reunion(i32 %x, i32 %y, ptr %input) {
 ; IR-NEXT:    [[P0:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[I]]
 ; IR-NEXT:    [[V0:%.*]] = load float, ptr [[P0]], align 4
 ; IR-NEXT:    call void @use(float [[V0]])
-; IR-NEXT:    [[P13:%.*]] = getelementptr inbounds float, ptr [[P0]], i64 5
+; IR-NEXT:    [[P13:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 20
 ; IR-NEXT:    [[V1:%.*]] = load float, ptr [[P13]], align 4
 ; IR-NEXT:    call void @use(float [[V1]])
 ; IR-NEXT:    ret void