Skip to content

[DirectX] Array GEPs need two indices #142853

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 5, 2025
Merged

Conversation

farzonl
Copy link
Member

@farzonl farzonl commented Jun 4, 2025

partially fixes #142836

  • Update DXILFlattenArrays.cpp GEPs to use two indicies since they are array GEPs
  • Update flatten test cases
  • This change reduces dxv bitcast validation errors by 364 (Total now is 1070x)
  • This change reduces dxv out of bounds validation errors by 124 (Total is now 24)
  • We are also able to successfully compile 4 more shaders

@llvmbot
Copy link
Member

llvmbot commented Jun 4, 2025

@llvm/pr-subscribers-backend-directx

Author: Farzon Lotfi (farzonl)

Changes

partially fixes #142836

  • Update DXILFlattenArrays.cpp GEPs to use two indicies since they are array GEPs
  • Update flatten test cases
  • This change reduces dxv bitcast validation errors by 364 (Total now is 1070x)
  • This change reduces dxv out of bounds validation errors by 124 (Total is now 24)
  • We are also able to successfully compile 4 more shaders

Full diff: https://github.com/llvm/llvm-project/pull/142853.diff

5 Files Affected:

  • (modified) llvm/lib/Target/DirectX/DXILFlattenArrays.cpp (+3-2)
  • (modified) llvm/test/CodeGen/DirectX/flatten-array.ll (+9-11)
  • (modified) llvm/test/CodeGen/DirectX/flatten-bug-117273.ll (+3-3)
  • (modified) llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll (+47-51)
  • (modified) llvm/test/CodeGen/DirectX/scalar-bug-117273.ll (+3-3)
diff --git a/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp b/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
index a3163a8969642..a98ec01d9fd4b 100644
--- a/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
+++ b/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
@@ -272,8 +272,9 @@ bool DXILFlattenArraysVisitor::visitGetElementPtrInstInGEPChainBase(
 
   ArrayType *FlattenedArrayType = GEPInfo.ParentArrayType;
   Value *FlatGEP =
-      Builder.CreateGEP(FlattenedArrayType, GEPInfo.ParendOperand, FlatIndex,
-                        GEP.getName() + ".flat", GEP.isInBounds());
+      Builder.CreateGEP(FlattenedArrayType, GEPInfo.ParendOperand,
+                        {Builder.getInt32(0), FlatIndex},
+                        GEP.getName() + ".flat", GEP.getNoWrapFlags());
 
   GEP.replaceAllUsesWith(FlatGEP);
   GEP.eraseFromParent();
diff --git a/llvm/test/CodeGen/DirectX/flatten-array.ll b/llvm/test/CodeGen/DirectX/flatten-array.ll
index 754d5a25ca905..a3ca39cc8e1d2 100644
--- a/llvm/test/CodeGen/DirectX/flatten-array.ll
+++ b/llvm/test/CodeGen/DirectX/flatten-array.ll
@@ -31,7 +31,7 @@ define void @alloca_4d_test ()  {
 ; CHECK-LABEL: gep_2d_test
 define void @gep_2d_test ()  {
     ; CHECK: [[a:%.*]] = alloca [9 x i32], align 4
-    ; CHECK-COUNT-9: getelementptr inbounds [9 x i32], ptr [[a]], i32 {{[0-8]}}
+    ; CHECK-COUNT-9: getelementptr inbounds [9 x i32], ptr [[a]], i32 0, i32 {{[0-8]}}
     ; CHECK-NEXT:    ret void
     %1 = alloca [3 x [3 x i32]], align 4
     %g2d0 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* %1, i32 0, i32 0
@@ -53,7 +53,7 @@ define void @gep_2d_test ()  {
 ; CHECK-LABEL: gep_3d_test
 define void @gep_3d_test ()  {
     ; CHECK: [[a:%.*]] = alloca [8 x i32], align 4
-    ; CHECK-COUNT-8: getelementptr inbounds [8 x i32], ptr [[a]], i32 {{[0-7]}}
+    ; CHECK-COUNT-8: getelementptr inbounds [8 x i32], ptr [[a]], i32 0, i32 {{[0-7]}}
     ; CHECK-NEXT:    ret void
     %1 = alloca [2 x[2 x [2 x i32]]], align 4
     %g3d0 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %1, i32 0, i32 0
@@ -76,7 +76,7 @@ define void @gep_3d_test ()  {
 ; CHECK-LABEL: gep_4d_test
 define void @gep_4d_test ()  {
     ; CHECK: [[a:%.*]] = alloca [16 x i32], align 4
-    ; CHECK-COUNT-16: getelementptr inbounds [16 x i32], ptr [[a]], i32 {{[0-9]|1[0-5]}}
+    ; CHECK-COUNT-16: getelementptr inbounds [16 x i32], ptr [[a]], i32 0, i32 {{[0-9]|1[0-5]}}
     ; CHECK-NEXT:    ret void
     %1 = alloca [2x[2 x[2 x [2 x i32]]]], align 4
     %g4d0 = getelementptr inbounds [2x[2 x[2 x [2 x i32]]]], [2x[2 x[2 x [2 x i32]]]]* %1, i32 0, i32 0
@@ -123,8 +123,7 @@ define void @gep_4d_test ()  {
 @b = internal global [2 x [3 x [4 x i32]]] zeroinitializer, align 16
 
 define void @global_gep_load() {
-  ; CHECK: [[GEP_PTR:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 6
-  ; CHECK: load i32, ptr [[GEP_PTR]], align 4
+  ; CHECK: load i32, ptr getelementptr inbounds ([24 x i32], ptr @a.1dim, i32 0, i32 6), align 4
   ; CHECK-NEXT:    ret void
   %1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @a, i32 0, i32 0
   %2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 1
@@ -142,11 +141,11 @@ define void @global_gep_load_index(i32 %row, i32 %col, i32 %timeIndex) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[ROW]], 12
 ; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP4]], [[TMP5]]
-; CHECK-NEXT:    [[DOTFLAT:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 [[TMP6]]
+; CHECK-NEXT:    %.flat = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 0, i32 %6
 ; CHECK-NOT: getelementptr inbounds [2 x [3 x [4 x i32]]]{{.*}}
 ; CHECK-NOT: getelementptr inbounds [3 x [4 x i32]]{{.*}}
 ; CHECK-NOT: getelementptr inbounds [4 x i32]{{.*}}
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTFLAT]], align 4
+; CHECK-NEXT:    %7 = load i32, ptr %.flat, align 4
 ; CHECK-NEXT:    ret void
 ;
   %1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @a, i32 0, i32 %row
@@ -163,11 +162,11 @@ define void @global_incomplete_gep_chain(i32 %row, i32 %col) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 0, [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i32 [[ROW]], 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    [[DOTFLAT:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 [[TMP4]]
+; CHECK-NEXT:    %.flat = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 0, i32 %4
 ; CHECK-NOT: getelementptr inbounds [2 x [3 x [4 x i32]]]{{.*}}
 ; CHECK-NOT: getelementptr inbounds [3 x [4 x i32]]{{.*}}
 ; CHECK-NOT: getelementptr inbounds [4 x i32]{{.*}}
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTFLAT]], align 4
+; CHECK-NEXT:    %5 = load i32, ptr %.flat, align 4
 ; CHECK-NEXT:    ret void
 ;
   %1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @a, i32 0, i32 %row
@@ -177,8 +176,7 @@ define void @global_incomplete_gep_chain(i32 %row, i32 %col) {
 }
 
 define void @global_gep_store() {
-  ; CHECK: [[GEP_PTR:%.*]] = getelementptr inbounds [24 x i32], ptr @b.1dim, i32 13
-  ; CHECK:  store i32 1, ptr [[GEP_PTR]], align 4
+  ; CHECK: store i32 1, ptr getelementptr inbounds ([24 x i32], ptr @b.1dim, i32 0, i32 13), align 4
   ; CHECK-NEXT:    ret void
   %1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @b, i32 0, i32 1
   %2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 0
diff --git a/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll b/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll
index 3ae5832ce8322..8ff8229031c87 100644
--- a/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll
+++ b/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll
@@ -7,10 +7,10 @@
 
 define internal void @main() {
 ; CHECK-LABEL: define internal void @main() {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [24 x float], ptr @ZerroInitArr.1dim, i32 1
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [24 x float], ptr @ZerroInitArr.1dim, i32 0, i32 1
 ; CHECK-NEXT:    [[DOTI0:%.*]] = load float, ptr [[TMP0]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [24 x float], ptr @ZerroInitArr.1dim, i32 2
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [24 x float], ptr @ZerroInitArr.1dim, i32 0, i32 2
 ; CHECK-NEXT:    [[DOTI03:%.*]] = load float, ptr [[TMP1]], align 16
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
index 7e5a92e1311f8..46b75368745a2 100644
--- a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
+++ b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
@@ -32,23 +32,23 @@ define <4 x i32> @load_array_vec_test() #0 {
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 3) to ptr addrspace(3)
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1) to ptr addrspace(3)
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4
-; CHECK-NEXT:    [[DOTI12:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), i32 1) to ptr addrspace(3)
-; CHECK-NEXT:    [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4
-; CHECK-NEXT:    [[DOTI24:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), i32 2) to ptr addrspace(3)
-; CHECK-NEXT:    [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4
-; CHECK-NEXT:    [[DOTI36:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), i32 3) to ptr addrspace(3)
-; CHECK-NEXT:    [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4
-; CHECK-NEXT:    [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]]
-; CHECK-NEXT:    [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]]
-; CHECK-NEXT:    [[DOTI210:%.*]] = add i32 [[TMP6]], [[DOTI25]]
-; CHECK-NEXT:    [[DOTI311:%.*]] = add i32 [[TMP8]], [[DOTI37]]
-; CHECK-NEXT:    [[DOTUPTO015:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI08]], i32 0
-; CHECK-NEXT:    [[DOTUPTO116:%.*]] = insertelement <4 x i32> [[DOTUPTO015]], i32 [[DOTI19]], i32 1
-; CHECK-NEXT:    [[DOTUPTO217:%.*]] = insertelement <4 x i32> [[DOTUPTO116]], i32 [[DOTI210]], i32 2
-; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i32 3
-; CHECK-NEXT:    ret <4 x i32> [[TMP16]]
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1) to ptr addrspace(3)
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(3) [[TMP9]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), i32 1) to ptr addrspace(3)
+; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), i32 2) to ptr addrspace(3)
+; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(3) [[TMP13]], align 4
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), i32 3) to ptr addrspace(3)
+; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(3) [[TMP15]], align 4
+; CHECK-NEXT: [[DOTI05:%.*]] = add i32 [[TMP2]], [[TMP10]]
+; CHECK-NEXT: [[DOTI16:%.*]] = add i32 [[TMP4]], [[TMP12]]
+; CHECK-NEXT: [[DOTI27:%.*]] = add i32 [[TMP6]], [[TMP14]]
+; CHECK-NEXT: [[DOTI38:%.*]] = add i32 [[TMP8]], [[TMP16]]
+; CHECK-NEXT: [[DOTUPTO01215:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI05]], i32 0
+; CHECK-NEXT: [[DOTUPTO11316:%.*]] = insertelement <4 x i32> [[DOTUPTO01215]], i32 [[DOTI16]], i32 1
+; CHECK-NEXT: [[DOTUPTO21417:%.*]] = insertelement <4 x i32> [[DOTUPTO11316]], i32 [[DOTI27]], i32 2
+; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[DOTUPTO21417]], i32 [[DOTI38]], i32 3
+; CHECK-NEXT:    ret <4 x i32> [[TMP17]]
 ;
   %1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4
   %2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4
@@ -81,23 +81,19 @@ define <4 x i32> @load_vec_test() #0 {
 define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
 ; CHECK-LABEL: define <4 x i32> @load_static_array_of_vec_test(
 ; CHECK-SAME: i32 [[INDEX:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[DOTFLAT:%.*]] = getelementptr inbounds [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
-; CHECK-NEXT:    [[DOTFLAT_I1:%.*]] = getelementptr i32, ptr [[TMP3]], i32 1
-; CHECK-NEXT:    [[DOTI1:%.*]] = load i32, ptr [[DOTFLAT_I1]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
-; CHECK-NEXT:    [[DOTFLAT_I2:%.*]] = getelementptr i32, ptr [[TMP4]], i32 2
-; CHECK-NEXT:    [[DOTI2:%.*]] = load i32, ptr [[DOTFLAT_I2]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
-; CHECK-NEXT:    [[DOTFLAT_I3:%.*]] = getelementptr i32, ptr [[TMP5]], i32 3
-; CHECK-NEXT:    [[DOTI3:%.*]] = load i32, ptr [[DOTFLAT_I3]], align 4
-; CHECK-NEXT:    [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
-; CHECK-NEXT:    [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[DOTI1]], i32 1
-; CHECK-NEXT:    [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[DOTI2]], i32 2
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[DOTI3]], i32 3
-; CHECK-NEXT:    ret <4 x i32> [[TMP6]]
+; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[INDEX]]
+; CHECK-NEXT: [[DOTI0:%.*]] = load i32, ptr [[DOTFLAT]], align 4
+; CHECK-NEXT: [[DOTFLAT_I1:%.*]] = getelementptr i32, ptr [[DOTFLAT]], i32 1
+; CHECK-NEXT: [[DOTI1:%.*]] = load i32, ptr [[DOTFLAT_I1]], align 4
+; CHECK-NEXT: [[DOTFLAT_I2:%.*]] = getelementptr i32, ptr [[DOTFLAT]], i32 2
+; CHECK-NEXT: [[DOTI2:%.*]] = load i32, ptr [[DOTFLAT_I2]], align 4
+; CHECK-NEXT: [[DOTFLAT_I3:%.*]] = getelementptr i32, ptr [[DOTFLAT]], i32 3
+; CHECK-NEXT: [[DOTI3:%.*]] = load i32, ptr [[DOTFLAT_I3]], align 4
+; CHECK-NEXT: [[DOTUPTO01:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI0]], i32 0
+; CHECK-NEXT: [[DOTUPTO12:%.*]] = insertelement <4 x i32> [[DOTUPTO01]], i32 [[DOTI1]], i32 1
+; CHECK-NEXT: [[DOTUPTO23:%.*]] = insertelement <4 x i32> [[DOTUPTO12]], i32 [[DOTI2]], i32 2
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[DOTUPTO23]], i32 [[DOTI3]], i32 3
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
 ;
   %3 = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* @staticArrayOfVecData, i32 0, i32 %index
   %4 = load <4 x i32>, <4 x i32>* %3, align 4
@@ -115,23 +111,23 @@ define <4 x i32> @multid_load_test() #0 {
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 3) to ptr addrspace(3)
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1) to ptr addrspace(3)
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4
-; CHECK-NEXT:    [[DOTI12:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 1) to ptr addrspace(3)
-; CHECK-NEXT:    [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4
-; CHECK-NEXT:    [[DOTI24:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 2) to ptr addrspace(3)
-; CHECK-NEXT:    [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4
-; CHECK-NEXT:    [[DOTI36:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 3) to ptr addrspace(3)
-; CHECK-NEXT:    [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4
-; CHECK-NEXT:    [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]]
-; CHECK-NEXT:    [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]]
-; CHECK-NEXT:    [[DOTI210:%.*]] = add i32 [[TMP6]], [[DOTI25]]
-; CHECK-NEXT:    [[DOTI311:%.*]] = add i32 [[TMP8]], [[DOTI37]]
-; CHECK-NEXT:    [[DOTUPTO015:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI08]], i32 0
-; CHECK-NEXT:    [[DOTUPTO116:%.*]] = insertelement <4 x i32> [[DOTUPTO015]], i32 [[DOTI19]], i32 1
-; CHECK-NEXT:    [[DOTUPTO217:%.*]] = insertelement <4 x i32> [[DOTUPTO116]], i32 [[DOTI210]], i32 2
-; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i32 3
-; CHECK-NEXT:    ret <4 x i32> [[TMP16]]
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1) to ptr addrspace(3)
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(3) [[TMP9]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), i32 1) to ptr addrspace(3)
+; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), i32 2) to ptr addrspace(3)
+; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(3) [[TMP13]], align 4
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), i32 3) to ptr addrspace(3)
+; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(3) [[TMP15]], align 4
+; CHECK-NEXT: [[DOTI05:%.*]] = add i32 [[TMP2]], [[TMP10]]
+; CHECK-NEXT: [[DOTI16:%.*]] = add i32 [[TMP4]], [[TMP12]]
+; CHECK-NEXT: [[DOTI27:%.*]] = add i32 [[TMP6]], [[TMP14]]
+; CHECK-NEXT: [[DOTI38:%.*]] = add i32 [[TMP8]], [[TMP16]]
+; CHECK-NEXT: [[DOTUPTO01215:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI05]], i32 0
+; CHECK-NEXT: [[DOTUPTO11316:%.*]] = insertelement <4 x i32> [[DOTUPTO01215]], i32 [[DOTI16]], i32 1
+; CHECK-NEXT: [[DOTUPTO21417:%.*]] = insertelement <4 x i32> [[DOTUPTO11316]], i32 [[DOTI27]], i32 2
+; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[DOTUPTO21417]], i32 [[DOTI38]], i32 3
+; CHECK-NEXT: ret <4 x i32> [[TMP17]]
 ;
   %1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 0, i32 0), align 4
   %2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 1, i32 1), align 4
diff --git a/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll b/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll
index 2676abec1d8ae..1c2f2ac796609 100644
--- a/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll
+++ b/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll
@@ -7,14 +7,14 @@
 ; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
 define internal void @main() #1 {
 ; CHECK-LABEL: define internal void @main() {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [24 x float], ptr @StaticArr.scalarized.1dim, i32 1
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [24 x float], ptr @StaticArr.scalarized.1dim, i32 0, i32 1
 ; CHECK-NEXT:    [[DOTI0:%.*]] = load float, ptr [[TMP0]], align 16
 ; CHECK-NEXT:    [[DOTI1:%.*]] = getelementptr float, ptr [[TMP0]], i32 1
 ; CHECK-NEXT:    [[DOTI11:%.*]] = load float, ptr [[DOTI1]], align 4
 ; CHECK-NEXT:    [[DOTI2:%.*]] = getelementptr float, ptr [[TMP0]], i32 2
 ; CHECK-NEXT:    [[DOTI22:%.*]] = load float, ptr [[DOTI2]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [24 x float], ptr @StaticArr.scalarized.1dim, i32 2
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [24 x float], ptr @StaticArr.scalarized.1dim, i32 0, i32 2
 ; CHECK-NEXT:    [[DOTI03:%.*]] = load float, ptr [[TMP1]], align 16
 ; CHECK-NEXT:    [[DOTI14:%.*]] = getelementptr float, ptr [[TMP1]], i32 1
 ; CHECK-NEXT:    [[DOTI15:%.*]] = load float, ptr [[DOTI14]], align 4

@farzonl farzonl marked this pull request as draft June 4, 2025 21:04
@farzonl farzonl force-pushed the bugfix/issue-142836 branch from ae290e7 to 4ac3a63 Compare June 4, 2025 21:18
@farzonl farzonl marked this pull request as ready for review June 4, 2025 21:20
partially fixes 142836
- Update DXILFlattenArrays.cpp GEPs to use two indicies since they are
  array GEPs
- Update flatten test cases
- This change reduces dxv bitcast validation errors by 364 (Total now is
  1070x)
- This change reduces dxv out of bounds validation errors by 124 (Total
  is now 24)
- We are also able to successfully compile 4 more shaders
@farzonl farzonl force-pushed the bugfix/issue-142836 branch from 4ac3a63 to 5441c2d Compare June 4, 2025 22:29
@farzonl farzonl merged commit 9ab4c16 into llvm:main Jun 5, 2025
12 checks passed
farzonl added a commit to farzonl/llvm-project that referenced this pull request Jun 5, 2025
farzonl added a commit to farzonl/llvm-project that referenced this pull request Jun 6, 2025
farzonl added a commit that referenced this pull request Jun 6, 2025
…bit cast instruction filter for DXIL Prepare pass (#142678)" (#143043)

- This reverts commit 9ab4c16.
- This reverts commit 1d6e8ec.

Noticed a really weird behavior where release and debug builds have
different codegen for loads with geps after this PR. This is going to
take a minute to debug and figure out why so revert seems to make the
most sense.

```diff
diff --git a/llvm/test/CodeGen/DirectX/flatten-array.ll b/llvm/test/CodeGen/DirectX/flatten-array.ll
index 47d7b50..efa9efe 100644
--- a/llvm/test/CodeGen/DirectX/flatten-array.ll
+++ b/llvm/test/CodeGen/DirectX/flatten-array.ll
@@ -123,7 +123,8 @@ define void @gep_4d_test ()  {
@b = internal global [2 x [3 x [4 x i32]]] zeroinitializer, align 16
define void @global_gep_load() {
-  ; CHECK: load i32, ptr getelementptr inbounds ([24 x i32], ptr @a.1dim, i32 0, i32 6), align 4
+  ; CHECK: %1 = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 0, i32 6
+  ; CHECK-NEXT: %2 = load i32, ptr %1, align 4
   ; CHECK-NEXT:    ret void
   %1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @A, i32 0, i32 0
   %2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 1
@@ -176,7 +177,8 @@ define void @global_incomplete_gep_chain(i32 %row, i32 %col) {
}
define void @global_gep_store() {
-  ; CHECK: store i32 1, ptr getelementptr inbounds ([24 x i32], ptr @b.1dim, i32 0, i32 13), align 4
+  ; CHECK: %1 = getelementptr inbounds [24 x i32], ptr @b.1dim, i32 0, i32 13
+  ; CHECK-NEXT: store i32 1, ptr %1, align 4
   ; CHECK-NEXT:    ret void
```
rorth pushed a commit to rorth/llvm-project that referenced this pull request Jun 11, 2025
partially fixes llvm#142836
- Update DXILFlattenArrays.cpp GEPs to use two indicies since they are
array GEPs
- Update flatten test cases
- This change reduces dxv bitcast validation errors by 364 (Total now is
1070x)
- This change reduces dxv out of bounds validation errors by 124 (Total
is now 24)
- We are also able to successfully compile 4 more shaders
rorth pushed a commit to rorth/llvm-project that referenced this pull request Jun 11, 2025
…ust bit cast instruction filter for DXIL Prepare pass (llvm#142678)" (llvm#143043)

- This reverts commit 9ab4c16.
- This reverts commit 1d6e8ec.

Noticed a really weird behavior where release and debug builds have
different codegen for loads with geps after this PR. This is going to
take a minute to debug and figure out why so revert seems to make the
most sense.

```diff
diff --git a/llvm/test/CodeGen/DirectX/flatten-array.ll b/llvm/test/CodeGen/DirectX/flatten-array.ll
index 47d7b50..efa9efe 100644
--- a/llvm/test/CodeGen/DirectX/flatten-array.ll
+++ b/llvm/test/CodeGen/DirectX/flatten-array.ll
@@ -123,7 +123,8 @@ define void @gep_4d_test ()  {
@b = internal global [2 x [3 x [4 x i32]]] zeroinitializer, align 16
define void @global_gep_load() {
-  ; CHECK: load i32, ptr getelementptr inbounds ([24 x i32], ptr @a.1dim, i32 0, i32 6), align 4
+  ; CHECK: %1 = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 0, i32 6
+  ; CHECK-NEXT: %2 = load i32, ptr %1, align 4
   ; CHECK-NEXT:    ret void
   %1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @A, i32 0, i32 0
   %2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 1
@@ -176,7 +177,8 @@ define void @global_incomplete_gep_chain(i32 %row, i32 %col) {
}
define void @global_gep_store() {
-  ; CHECK: store i32 1, ptr getelementptr inbounds ([24 x i32], ptr @b.1dim, i32 0, i32 13), align 4
+  ; CHECK: %1 = getelementptr inbounds [24 x i32], ptr @b.1dim, i32 0, i32 13
+  ; CHECK-NEXT: store i32 1, ptr %1, align 4
   ; CHECK-NEXT:    ret void
```
farzonl added a commit to farzonl/llvm-project that referenced this pull request Jun 11, 2025
partially fixes llvm#142836
- Update DXILFlattenArrays.cpp GEPs to use two indicies since they are
array GEPs
- Update flatten test cases
- This change reduces dxv bitcast validation errors by 364 (Total now is
1070x)
- This change reduces dxv out of bounds validation errors by 124 (Total
is now 24)
- We are also able to successfully compile 4 more shaders
DhruvSrivastavaX pushed a commit to DhruvSrivastavaX/lldb-for-aix that referenced this pull request Jun 12, 2025
partially fixes llvm#142836
- Update DXILFlattenArrays.cpp GEPs to use two indicies since they are
array GEPs
- Update flatten test cases
- This change reduces dxv bitcast validation errors by 364 (Total now is
1070x)
- This change reduces dxv out of bounds validation errors by 124 (Total
is now 24)
- We are also able to successfully compile 4 more shaders
DhruvSrivastavaX pushed a commit to DhruvSrivastavaX/lldb-for-aix that referenced this pull request Jun 12, 2025
…ust bit cast instruction filter for DXIL Prepare pass (llvm#142678)" (llvm#143043)

- This reverts commit 9ab4c16.
- This reverts commit 1d6e8ec.

Noticed a really weird behavior where release and debug builds have
different codegen for loads with geps after this PR. This is going to
take a minute to debug and figure out why so revert seems to make the
most sense.

```diff
diff --git a/llvm/test/CodeGen/DirectX/flatten-array.ll b/llvm/test/CodeGen/DirectX/flatten-array.ll
index 47d7b50..efa9efe 100644
--- a/llvm/test/CodeGen/DirectX/flatten-array.ll
+++ b/llvm/test/CodeGen/DirectX/flatten-array.ll
@@ -123,7 +123,8 @@ define void @gep_4d_test ()  {
@b = internal global [2 x [3 x [4 x i32]]] zeroinitializer, align 16
define void @global_gep_load() {
-  ; CHECK: load i32, ptr getelementptr inbounds ([24 x i32], ptr @a.1dim, i32 0, i32 6), align 4
+  ; CHECK: %1 = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 0, i32 6
+  ; CHECK-NEXT: %2 = load i32, ptr %1, align 4
   ; CHECK-NEXT:    ret void
   %1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @A, i32 0, i32 0
   %2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 1
@@ -176,7 +177,8 @@ define void @global_incomplete_gep_chain(i32 %row, i32 %col) {
}
define void @global_gep_store() {
-  ; CHECK: store i32 1, ptr getelementptr inbounds ([24 x i32], ptr @b.1dim, i32 0, i32 13), align 4
+  ; CHECK: %1 = getelementptr inbounds [24 x i32], ptr @b.1dim, i32 0, i32 13
+  ; CHECK-NEXT: store i32 1, ptr %1, align 4
   ; CHECK-NEXT:    ret void
```
farzonl added a commit that referenced this pull request Jun 12, 2025
This change relands  #142853
It fixes the circular reference issue we were seeing in GEPs
ex `%.flat = getelementptr inbounds [16 x i32], ptr %.flat, i32 0, i32
15`
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Jun 12, 2025
This change relands  llvm/llvm-project#142853
It fixes the circular reference issue we were seeing in GEPs
ex `%.flat = getelementptr inbounds [16 x i32], ptr %.flat, i32 0, i32
15`
tomtor pushed a commit to tomtor/llvm-project that referenced this pull request Jun 14, 2025
This change relands  llvm#142853
It fixes the circular reference issue we were seeing in GEPs
ex `%.flat = getelementptr inbounds [16 x i32], ptr %.flat, i32 0, i32
15`
akuhlens pushed a commit to akuhlens/llvm-project that referenced this pull request Jun 24, 2025
This change relands  llvm#142853
It fixes the circular reference issue we were seeing in GEPs
ex `%.flat = getelementptr inbounds [16 x i32], ptr %.flat, i32 0, i32
15`
@farzonl farzonl self-assigned this Jun 27, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[DirectX] GEPs to multi-dimensional arrays are incorrectly flattened
3 participants