Skip to content

Commit 1d6e8ec

Browse files
authored
Adjust bit cast instruction filter for DXIL Prepare pass (llvm#142678)
This PR addresses a specific edge case when deciding whether or not to produce a bitcast instruction. Specifically, when the given instruction is a global array, the element type of the array wasn't correctly compared to the return type. In this specific case, if the types are equal, a bitcast shouldn't be created, but it was. This PR checks to see if the element type of the array is the same as the return type, and if it is, it doesn't create a bitcast instruction. Fixes llvm#139013
1 parent b58b3e1 commit 1d6e8ec

File tree

3 files changed

+121
-40
lines changed

3 files changed

+121
-40
lines changed

llvm/lib/Target/DirectX/DXILPrepare.cpp

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,9 +148,49 @@ class DXILPrepareModule : public ModulePass {
148148
Type *Ty) {
149149
// Omit bitcasts if the incoming value matches the instruction type.
150150
auto It = PointerTypes.find(Operand);
151-
if (It != PointerTypes.end())
152-
if (cast<TypedPointerType>(It->second)->getElementType() == Ty)
151+
if (It != PointerTypes.end()) {
152+
auto *OpTy = cast<TypedPointerType>(It->second)->getElementType();
153+
if (OpTy == Ty)
153154
return nullptr;
155+
}
156+
157+
Type *ValTy = Operand->getType();
158+
// Also omit the bitcast for matching global array types
159+
if (auto *GlobalVar = dyn_cast<GlobalVariable>(Operand))
160+
ValTy = GlobalVar->getValueType();
161+
162+
if (auto *AI = dyn_cast<AllocaInst>(Operand))
163+
ValTy = AI->getAllocatedType();
164+
165+
if (auto *ArrTy = dyn_cast<ArrayType>(ValTy)) {
166+
Type *ElTy = ArrTy->getElementType();
167+
if (ElTy == Ty)
168+
return nullptr;
169+
}
170+
171+
// finally, drill down GEP instructions until we get the array
172+
// that is being accessed, and compare element types
173+
if (ConstantExpr *GEPInstr = dyn_cast<ConstantExpr>(Operand)) {
174+
while (GEPInstr->getOpcode() == Instruction::GetElementPtr) {
175+
Value *OpArg = GEPInstr->getOperand(0);
176+
if (ConstantExpr *NewGEPInstr = dyn_cast<ConstantExpr>(OpArg)) {
177+
GEPInstr = NewGEPInstr;
178+
continue;
179+
}
180+
181+
if (auto *GlobalVar = dyn_cast<GlobalVariable>(OpArg))
182+
ValTy = GlobalVar->getValueType();
183+
if (auto *AI = dyn_cast<AllocaInst>(Operand))
184+
ValTy = AI->getAllocatedType();
185+
if (auto *ArrTy = dyn_cast<ArrayType>(ValTy)) {
186+
Type *ElTy = ArrTy->getElementType();
187+
if (ElTy == Ty)
188+
return nullptr;
189+
}
190+
break;
191+
}
192+
}
193+
154194
// Insert bitcasts where we are removing the instruction.
155195
Builder.SetInsertPoint(&Inst);
156196
// This code only gets hit in opaque-pointer mode, so the type of the

llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll

Lines changed: 26 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -60,19 +60,15 @@ define <4 x i32> @load_array_vec_test() #0 {
6060
define <4 x i32> @load_vec_test() #0 {
6161
; CHECK-LABEL: define <4 x i32> @load_vec_test(
6262
; CHECK-SAME: ) #[[ATTR0]] {
63-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr addrspace(3) @vecData.scalarized to ptr addrspace(3)
64-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4
65-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 1) to ptr addrspace(3)
66-
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4
67-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 2) to ptr addrspace(3)
68-
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
69-
; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 3) to ptr addrspace(3)
70-
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
71-
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
72-
; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[TMP4]], i32 1
73-
; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[TMP6]], i32 2
74-
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[TMP8]], i32 3
75-
; CHECK-NEXT: ret <4 x i32> [[TMP9]]
63+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) @vecData.scalarized, align 4
64+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 1), align 4
65+
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 2), align 4
66+
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 3), align 4
67+
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
68+
; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[TMP2]], i32 1
69+
; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[TMP3]], i32 2
70+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[TMP4]], i32 3
71+
; CHECK-NEXT: ret <4 x i32> [[TMP5]]
7672
;
7773
%1 = load <4 x i32>, <4 x i32> addrspace(3)* @"vecData", align 4
7874
ret <4 x i32> %1
@@ -103,31 +99,23 @@ define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
10399
define <4 x i32> @multid_load_test() #0 {
104100
; CHECK-LABEL: define <4 x i32> @multid_load_test(
105101
; CHECK-SAME: ) #[[ATTR0]] {
106-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim to ptr addrspace(3)
107-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4
108-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1) to ptr addrspace(3)
109-
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4
110-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 2) to ptr addrspace(3)
111-
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
112-
; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 3) to ptr addrspace(3)
113-
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
114-
; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1) to ptr addrspace(3)
115-
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(3) [[TMP9]], align 4
116-
; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), i32 1) to ptr addrspace(3)
117-
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4
118-
; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), i32 2) to ptr addrspace(3)
119-
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(3) [[TMP13]], align 4
120-
; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), i32 3) to ptr addrspace(3)
121-
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(3) [[TMP15]], align 4
122-
; CHECK-NEXT: [[DOTI05:%.*]] = add i32 [[TMP2]], [[TMP10]]
123-
; CHECK-NEXT: [[DOTI16:%.*]] = add i32 [[TMP4]], [[TMP12]]
124-
; CHECK-NEXT: [[DOTI27:%.*]] = add i32 [[TMP6]], [[TMP14]]
125-
; CHECK-NEXT: [[DOTI38:%.*]] = add i32 [[TMP8]], [[TMP16]]
126-
; CHECK-NEXT: [[DOTUPTO01215:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI05]], i32 0
127-
; CHECK-NEXT: [[DOTUPTO11316:%.*]] = insertelement <4 x i32> [[DOTUPTO01215]], i32 [[DOTI16]], i32 1
128-
; CHECK-NEXT: [[DOTUPTO21417:%.*]] = insertelement <4 x i32> [[DOTUPTO11316]], i32 [[DOTI27]], i32 2
129-
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[DOTUPTO21417]], i32 [[DOTI38]], i32 3
130-
; CHECK-NEXT: ret <4 x i32> [[TMP17]]
102+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, align 4
103+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), align 4
104+
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 2), align 4
105+
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 3), align 4
106+
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), align 4
107+
; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), i32 1), align 4
108+
; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), i32 2), align 4
109+
; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), i32 3), align 4
110+
; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP1]], [[TMP5]]
111+
; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP2]], [[DOTI13]]
112+
; CHECK-NEXT: [[DOTI210:%.*]] = add i32 [[TMP3]], [[DOTI25]]
113+
; CHECK-NEXT: [[DOTI311:%.*]] = add i32 [[TMP4]], [[DOTI37]]
114+
; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI08]], i32 0
115+
; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <4 x i32> [[DOTUPTO015]], i32 [[DOTI19]], i32 1
116+
; CHECK-NEXT: [[DOTUPTO217:%.*]] = insertelement <4 x i32> [[DOTUPTO116]], i32 [[DOTI210]], i32 2
117+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i32 3
118+
; CHECK-NEXT: ret <4 x i32> [[TMP6]]
131119
;
132120
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 0, i32 0), align 4
133121
%2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 1, i32 1), align 4
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
; RUN: opt -S --dxil-prepare %s | FileCheck %s
2+
3+
; Test that global arrays do not get a bitcast instruction
4+
; after the dxil-prepare pass.
5+
6+
target triple = "dxilv1.2-unknown-shadermodel6.2-compute"
7+
8+
@inputTile.1dim = local_unnamed_addr addrspace(3) global [3 x float] zeroinitializer, align 2
9+
10+
; CHECK-LABEL: testload
11+
define float @testload() local_unnamed_addr {
12+
; NOTE: this would be "bitcast ptr addrspace(3)..." before the change that introduced this test,
13+
; after the dxil-prepare pass is run
14+
; CHECK-NEXT: load float, ptr addrspace(3) @inputTile.1dim, align 2
15+
%v = load float, ptr addrspace(3) @inputTile.1dim, align 2
16+
17+
ret float %v
18+
}
19+
20+
; CHECK-LABEL: teststore
21+
define void @teststore() local_unnamed_addr {
22+
; CHECK-next: store float 2.000000e+00, ptr addrspace(3) @inputTile.1dim, align 2
23+
store float 2.000000e+00, ptr addrspace(3) @inputTile.1dim, align 2
24+
25+
ret void
26+
}
27+
28+
; CHECK-LABEL: testGEPConst
29+
define float @testGEPConst() local_unnamed_addr {
30+
; CHECK-NEXT: load float, ptr addrspace(3) getelementptr (float, ptr addrspace(3) @inputTile.1dim, i32 1), align 4
31+
%v = load float, ptr addrspace(3) getelementptr (float, ptr addrspace(3) @inputTile.1dim, i32 1), align 4
32+
33+
ret float %v
34+
}
35+
36+
; CHECK-LABEL: testGEPNonConst
37+
define float @testGEPNonConst(i32 %i) local_unnamed_addr {
38+
; CHECK-NEXT: getelementptr float, ptr addrspace(3) @inputTile.1dim, i32 %i
39+
%gep = getelementptr float, ptr addrspace(3) @inputTile.1dim, i32 %i
40+
%v = load float, ptr addrspace(3) %gep
41+
42+
ret float %v
43+
}
44+
45+
; CHECK-LABEL: testAlloca
46+
define float @testAlloca(i32 %i) local_unnamed_addr {
47+
; CHECK-NEXT: alloca [3 x float], align 4
48+
%arr = alloca [3 x float], align 4
49+
; CHECK-NEXT: getelementptr [3 x float], ptr %arr, i32 1
50+
%gep = getelementptr [3 x float], ptr %arr, i32 1
51+
%v = load float, ptr %gep
52+
ret float %v
53+
}

0 commit comments

Comments
 (0)