Skip to content

Commit 6ec7658

Browse files
committed
comments
1 parent 4c9b94d commit 6ec7658

File tree

6 files changed

+101
-40
lines changed

6 files changed

+101
-40
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2020,7 +2020,7 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) {
20202020
/// \param TM TargetMachine (TODO: remove once DL contains nullptr values)
20212021
/// \param AS Target Address Space
20222022
/// \return true if \p V cannot be the null value of \p AS, false otherwise.
2023-
static bool isPtrKnownNeverNull(Value *V, const DataLayout &DL,
2023+
static bool isPtrKnownNeverNull(const Value *V, const DataLayout &DL,
20242024
const AMDGPUTargetMachine &TM, unsigned AS) {
20252025
// Pointer cannot be null if it's a block address, GV or alloca.
20262026
// NOTE: We don't support extern_weak, but if we did, we'd need to check for
@@ -2048,6 +2048,10 @@ static bool isPtrKnownNeverNull(Value *V, const DataLayout &DL,
20482048
}
20492049

20502050
bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
2051+
// Intrinsic doesn't support vectors, also it seems that it's often difficult to prove that a vector cannot have any nulls in it so it's unclear if it's worth supporting.
2052+
if (I.getType()->isVectorTy())
2053+
return false;
2054+
20512055
// Check if this can be lowered to a amdgcn.addrspacecast.nonnull.
20522056
// This is only worthwhile for casts from/to priv/local to flat.
20532057
const unsigned SrcAS = I.getSrcAddressSpace();
@@ -2063,28 +2067,10 @@ bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
20632067
if (!CanLower)
20642068
return false;
20652069

2066-
// Check the Src operand, looking through any PHIs.
2067-
SmallVector<Value *, 4> WorkList;
2068-
DenseSet<const PHINode *> SeenPHIs;
2069-
WorkList.push_back(I.getOperand(0));
2070-
while (!WorkList.empty()) {
2071-
Value *Cur = getUnderlyingObject(WorkList.pop_back_val());
2072-
2073-
// Look through PHIs - add all incoming values to the queue.
2074-
if (const auto *Phi = dyn_cast<PHINode>(Cur)) {
2075-
auto [It, Inserted] = SeenPHIs.insert(Phi);
2076-
if (!Inserted)
2077-
return false; // infinite recursion
2078-
2079-
for (auto &Inc : Phi->incoming_values())
2080-
WorkList.push_back(Inc.get());
2081-
continue;
2082-
}
2083-
2084-
if (isPtrKnownNeverNull(Cur, *DL, *TM, SrcAS))
2085-
continue;
2070+
SmallVector<const Value *, 4> WorkList;
2071+
getUnderlyingObjects(I.getOperand(0), WorkList);
2072+
if(!all_of(WorkList, [&](const Value* V) { return isPtrKnownNeverNull(V, *DL, *TM, SrcAS); }))
20862073
return false;
2087-
}
20882074

20892075
IRBuilder<> B(&I);
20902076
auto *Intrin = B.CreateIntrinsic(

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2272,7 +2272,7 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
22722272
// For llvm.amdgcn.addrspacecast.nonnull we can always assume non-null, for
22732273
// G_ADDRSPACE_CAST we need to guess.
22742274
const bool IsKnownNonNull =
2275-
isa<GIntrinsic>(MI) ? true : isKnownNonNull(Src, MRI, TM, SrcAS);
2275+
isa<GIntrinsic>(MI) || isKnownNonNull(Src, MRI, TM, SrcAS);
22762276

22772277
if (TM.isNoopAddrSpaceCast(SrcAS, DestAS)) {
22782278
MI.setDesc(B.getTII().get(TargetOpcode::G_BITCAST));

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1422,8 +1422,8 @@ void SITargetLowering::CollectTargetIntrinsicOperands(
14221422
// The DAG's ValueType loses the addrspaces.
14231423
// Add them as 2 extra Constant operands "from" and "to".
14241424
unsigned SrcAS =
1425-
I.getOperand(0)->getType()->getScalarType()->getPointerAddressSpace();
1426-
unsigned DstAS = I.getType()->getScalarType()->getPointerAddressSpace();
1425+
I.getOperand(0)->getType()->getPointerAddressSpace();
1426+
unsigned DstAS = I.getType()->getPointerAddressSpace();
14271427
Ops.push_back(DAG.getTargetConstant(SrcAS, SDLoc(), MVT::i32));
14281428
Ops.push_back(DAG.getTargetConstant(DstAS, SDLoc(), MVT::i32));
14291429
break;
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -mtriple=amdgcn-- -amdgpu-codegenprepare -S < %s | FileCheck -check-prefix=OPT %s
3+
4+
; Check that CGP doesn't try to create a amdgcn.addrspace.nonnull of vector, as that's not supported.
5+
6+
define <4 x ptr> @vec_of_local_to_flat_nonnull_arg() {
7+
; OPT-LABEL: define <4 x ptr> @vec_of_local_to_flat_nonnull_arg() {
8+
; OPT-NEXT: [[X:%.*]] = addrspacecast <4 x ptr addrspace(3)> zeroinitializer to <4 x ptr>
9+
; OPT-NEXT: ret <4 x ptr> [[X]]
10+
;
11+
%x = addrspacecast <4 x ptr addrspace(3)> zeroinitializer to <4 x ptr>
12+
ret <4 x ptr> %x
13+
}

llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,6 @@ define void @knownbits_on_priv_to_flat(ptr addrspace(5) %ptr) {
168168
ret void
169169
}
170170

171-
; this would recursive infinitely and we'll give up once we notice it.
172171
define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) {
173172
; OPT-LABEL: define void @recursive_phis(
174173
; OPT-SAME: i1 [[COND:%.*]], ptr addrspace(5) [[PTR:%.*]]) {
@@ -185,8 +184,8 @@ define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) {
185184
; OPT-NEXT: br label [[FINALLY]]
186185
; OPT: finally:
187186
; OPT-NEXT: [[PHI_PTR]] = phi ptr addrspace(5) [ [[KB_PTR]], [[THEN]] ], [ [[OTHER_PHI]], [[ELSE]] ]
188-
; OPT-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(5) [[PHI_PTR]] to ptr
189-
; OPT-NEXT: store volatile i32 7, ptr [[X]], align 4
187+
; OPT-NEXT: [[TMP0:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[PHI_PTR]])
188+
; OPT-NEXT: store volatile i32 7, ptr [[TMP0]], align 4
190189
; OPT-NEXT: br i1 [[COND]], label [[ELSE]], label [[END:%.*]]
191190
; OPT: end:
192191
; OPT-NEXT: ret void
@@ -202,19 +201,16 @@ define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) {
202201
; DAGISEL-ASM-NEXT: v_and_b32_e32 v0, 0xffff, v1
203202
; DAGISEL-ASM-NEXT: ; %bb.2: ; %finallyendcf.split
204203
; DAGISEL-ASM-NEXT: s_or_b64 exec, exec, s[4:5]
205-
; DAGISEL-ASM-NEXT: s_mov_b64 s[8:9], src_private_base
206204
; DAGISEL-ASM-NEXT: s_xor_b64 s[6:7], vcc, -1
207205
; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], 0
208-
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s9
206+
; DAGISEL-ASM-NEXT: s_mov_b64 s[8:9], src_private_base
209207
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
210208
; DAGISEL-ASM-NEXT: .LBB7_3: ; %finally
211209
; DAGISEL-ASM-NEXT: ; =>This Inner Loop Header: Depth=1
212-
; DAGISEL-ASM-NEXT: s_and_b64 s[8:9], exec, s[6:7]
213-
; DAGISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
214-
; DAGISEL-ASM-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5]
215-
; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
216-
; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v3, 0, v0, vcc
217-
; DAGISEL-ASM-NEXT: flat_store_dword v[3:4], v2
210+
; DAGISEL-ASM-NEXT: s_and_b64 s[10:11], exec, s[6:7]
211+
; DAGISEL-ASM-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5]
212+
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s9
213+
; DAGISEL-ASM-NEXT: flat_store_dword v[0:1], v2
218214
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0)
219215
; DAGISEL-ASM-NEXT: s_andn2_b64 exec, exec, s[4:5]
220216
; DAGISEL-ASM-NEXT: s_cbranch_execnz .LBB7_3
@@ -242,11 +238,8 @@ define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) {
242238
; GISEL-ASM-NEXT: .LBB7_3: ; %finally
243239
; GISEL-ASM-NEXT: ; =>This Inner Loop Header: Depth=1
244240
; GISEL-ASM-NEXT: s_and_b64 s[8:9], exec, s[4:5]
245-
; GISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
246241
; GISEL-ASM-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7]
247-
; GISEL-ASM-NEXT: v_cndmask_b32_e32 v3, 0, v0, vcc
248-
; GISEL-ASM-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
249-
; GISEL-ASM-NEXT: flat_store_dword v[3:4], v2
242+
; GISEL-ASM-NEXT: flat_store_dword v[0:1], v2
250243
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0)
251244
; GISEL-ASM-NEXT: s_andn2_b64 exec, exec, s[6:7]
252245
; GISEL-ASM-NEXT: s_cbranch_execnz .LBB7_3
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,DAGISEL-ASM
3+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,GISEL-ASM
4+
5+
define void @local_to_flat(ptr addrspace(3) %ptr) {
6+
; ASM-LABEL: local_to_flat:
7+
; ASM: ; %bb.0:
8+
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9+
; ASM-NEXT: s_mov_b64 s[4:5], src_shared_base
10+
; ASM-NEXT: v_mov_b32_e32 v1, s5
11+
; ASM-NEXT: v_mov_b32_e32 v2, 7
12+
; ASM-NEXT: flat_store_dword v[0:1], v2
13+
; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
14+
; ASM-NEXT: s_setpc_b64 s[30:31]
15+
%1 = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) %ptr)
16+
store volatile i32 7, ptr %1, align 4
17+
ret void
18+
}
19+
20+
define void @private_to_flat(ptr addrspace(5) %ptr) {
21+
; ASM-LABEL: private_to_flat:
22+
; ASM: ; %bb.0:
23+
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24+
; ASM-NEXT: s_mov_b64 s[4:5], src_private_base
25+
; ASM-NEXT: v_mov_b32_e32 v1, s5
26+
; ASM-NEXT: v_mov_b32_e32 v2, 7
27+
; ASM-NEXT: flat_store_dword v[0:1], v2
28+
; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
29+
; ASM-NEXT: s_setpc_b64 s[30:31]
30+
%1 = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) %ptr)
31+
store volatile i32 7, ptr %1, align 4
32+
ret void
33+
}
34+
35+
define void @flat_to_local(ptr %ptr) {
36+
; ASM-LABEL: flat_to_local:
37+
; ASM: ; %bb.0:
38+
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39+
; ASM-NEXT: v_mov_b32_e32 v1, 7
40+
; ASM-NEXT: ds_write_b32 v0, v1
41+
; ASM-NEXT: s_waitcnt lgkmcnt(0)
42+
; ASM-NEXT: s_setpc_b64 s[30:31]
43+
%1 = call ptr addrspace(3) @llvm.amdgcn.addrspacecast.nonnull.p3.p0(ptr %ptr)
44+
store volatile i32 7, ptr addrspace(3) %1, align 4
45+
ret void
46+
}
47+
48+
define void @flat_to_private(ptr %ptr) {
49+
; ASM-LABEL: flat_to_private:
50+
; ASM: ; %bb.0:
51+
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52+
; ASM-NEXT: v_mov_b32_e32 v1, 7
53+
; ASM-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
54+
; ASM-NEXT: s_waitcnt vmcnt(0)
55+
; ASM-NEXT: s_setpc_b64 s[30:31]
56+
%1 = call ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr %ptr)
57+
store volatile i32 7, ptr addrspace(5) %1, align 4
58+
ret void
59+
}
60+
61+
declare ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3))
62+
declare ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5))
63+
declare ptr addrspace(3) @llvm.amdgcn.addrspacecast.nonnull.p3.p0(ptr)
64+
declare ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr)
65+
66+
declare <4 x ptr> @llvm.amdgcn.addrspacecast.nonnull.v4p0.v4p3(<4 x ptr addrspace(3)>)
67+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
68+
; DAGISEL-ASM: {{.*}}
69+
; GISEL-ASM: {{.*}}

0 commit comments

Comments
 (0)