Skip to content

Commit 62942e8

Browse files
committed
[AMDGPU] Make getAssumedAddrSpace return AS1 for pointer kernel arguments
1 parent b553022 commit 62942e8

14 files changed

+316
-334
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,10 @@ bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
977977
}
978978

979979
unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
980+
if (auto *Arg = dyn_cast<Argument>(V);
981+
Arg && AMDGPU::isKernelCC(Arg->getParent()) && !Arg->hasByRefAttr())
982+
return AMDGPUAS::GLOBAL_ADDRESS;
983+
980984
const auto *LD = dyn_cast<LoadInst>(V);
981985
if (!LD) // TODO: Handle invariant load like constant.
982986
return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12592,29 +12592,18 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1259212592
}
1259312593

1259412594
ChangeStatus updateImpl(Attributor &A) override {
12595-
unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
1259612595
uint32_t OldAddressSpace = AssumedAddressSpace;
1259712596

1259812597
auto CheckAddressSpace = [&](Value &Obj) {
1259912598
if (isa<UndefValue>(&Obj))
1260012599
return true;
12601-
// If an argument in flat address space only has addrspace cast uses, and
12602-
// those casts are same, then we take the dst addrspace.
1260312600
if (auto *Arg = dyn_cast<Argument>(&Obj)) {
12604-
if (Arg->getType()->getPointerAddressSpace() == FlatAS) {
12605-
unsigned CastAddrSpace = FlatAS;
12606-
for (auto *U : Arg->users()) {
12607-
auto *ASCI = dyn_cast<AddrSpaceCastInst>(U);
12608-
if (!ASCI)
12609-
return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
12610-
if (CastAddrSpace != FlatAS &&
12611-
CastAddrSpace != ASCI->getDestAddressSpace())
12612-
return false;
12613-
CastAddrSpace = ASCI->getDestAddressSpace();
12614-
}
12615-
if (CastAddrSpace != FlatAS)
12616-
return takeAddressSpace(CastAddrSpace);
12617-
}
12601+
auto *TTI =
12602+
A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(
12603+
*Arg->getParent());
12604+
unsigned AssumedAS = TTI->getAssumedAddrSpace(Arg);
12605+
if (AssumedAS != ~0U)
12606+
return takeAddressSpace(AssumedAS);
1261812607
}
1261912608
return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
1262012609
};

llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,30 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefix=GFX942
33

4-
define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
4+
define void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
55
; GFX942-LABEL: flat_atomic_fadd_f32_noret_pat:
66
; GFX942: ; %bb.0:
7-
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
7+
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88
; GFX942-NEXT: v_mov_b32_e32 v2, 4.0
9-
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
10-
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
119
; GFX942-NEXT: buffer_wbl2 sc0 sc1
1210
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1
1311
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1412
; GFX942-NEXT: buffer_inv sc0 sc1
15-
; GFX942-NEXT: s_endpgm
13+
; GFX942-NEXT: s_setpc_b64 s[30:31]
1614
%ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
1715
ret void
1816
}
1917

20-
define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
18+
define void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
2119
; GFX942-LABEL: flat_atomic_fadd_f32_noret_pat_ieee:
2220
; GFX942: ; %bb.0:
23-
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
21+
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2422
; GFX942-NEXT: v_mov_b32_e32 v2, 4.0
25-
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
26-
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
2723
; GFX942-NEXT: buffer_wbl2 sc0 sc1
2824
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1
2925
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3026
; GFX942-NEXT: buffer_inv sc0 sc1
31-
; GFX942-NEXT: s_endpgm
27+
; GFX942-NEXT: s_setpc_b64 s[30:31]
3228
%ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
3329
ret void
3430
}

llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll

Lines changed: 22 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1334,92 +1334,82 @@ main_body:
13341334
ret void
13351335
}
13361336

1337-
define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
1337+
define void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
13381338
; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat:
13391339
; GFX90A: ; %bb.0: ; %main_body
1340-
; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1340+
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13411341
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
13421342
; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
1343-
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1344-
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
13451343
; GFX90A-NEXT: buffer_wbl2
13461344
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
13471345
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
13481346
; GFX90A-NEXT: buffer_invl2
13491347
; GFX90A-NEXT: buffer_wbinvl1_vol
1350-
; GFX90A-NEXT: s_endpgm
1348+
; GFX90A-NEXT: s_setpc_b64 s[30:31]
13511349
;
13521350
; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat:
13531351
; GFX942: ; %bb.0: ; %main_body
1354-
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1352+
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13551353
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
1356-
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
1357-
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
13581354
; GFX942-NEXT: buffer_wbl2 sc0 sc1
13591355
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1
13601356
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
13611357
; GFX942-NEXT: buffer_inv sc0 sc1
1362-
; GFX942-NEXT: s_endpgm
1358+
; GFX942-NEXT: s_setpc_b64 s[30:31]
13631359
main_body:
13641360
%ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
13651361
ret void
13661362
}
13671363

1368-
define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
1364+
define void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
13691365
; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
13701366
; GFX90A: ; %bb.0: ; %main_body
1371-
; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1367+
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13721368
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
13731369
; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
1374-
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1375-
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
13761370
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
13771371
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
13781372
; GFX90A-NEXT: buffer_wbinvl1_vol
1379-
; GFX90A-NEXT: s_endpgm
1373+
; GFX90A-NEXT: s_setpc_b64 s[30:31]
13801374
;
13811375
; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
13821376
; GFX942: ; %bb.0: ; %main_body
1383-
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1377+
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13841378
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
1385-
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
1386-
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
13871379
; GFX942-NEXT: buffer_wbl2 sc1
13881380
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
13891381
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
13901382
; GFX942-NEXT: buffer_inv sc1
1391-
; GFX942-NEXT: s_endpgm
1383+
; GFX942-NEXT: s_setpc_b64 s[30:31]
13921384
main_body:
13931385
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
13941386
ret void
13951387
}
13961388

1397-
define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
1389+
define void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
13981390
; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_system:
13991391
; GFX90A: ; %bb.0: ; %main_body
1400-
; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1392+
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14011393
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
14021394
; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
1403-
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1404-
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
14051395
; GFX90A-NEXT: buffer_wbl2
14061396
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
14071397
; GFX90A-NEXT: s_waitcnt vmcnt(0)
14081398
; GFX90A-NEXT: buffer_invl2
14091399
; GFX90A-NEXT: buffer_wbinvl1_vol
1410-
; GFX90A-NEXT: s_endpgm
1400+
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1401+
; GFX90A-NEXT: s_setpc_b64 s[30:31]
14111402
;
14121403
; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_system:
14131404
; GFX942: ; %bb.0: ; %main_body
1414-
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1405+
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14151406
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
1416-
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
1417-
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
14181407
; GFX942-NEXT: buffer_wbl2 sc0 sc1
14191408
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1
14201409
; GFX942-NEXT: s_waitcnt vmcnt(0)
14211410
; GFX942-NEXT: buffer_inv sc0 sc1
1422-
; GFX942-NEXT: s_endpgm
1411+
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
1412+
; GFX942-NEXT: s_setpc_b64 s[30:31]
14231413
main_body:
14241414
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
14251415
ret void
@@ -1506,30 +1496,26 @@ main_body:
15061496
ret double %ret
15071497
}
15081498

1509-
define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
1499+
define void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
15101500
; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
15111501
; GFX90A: ; %bb.0: ; %main_body
1512-
; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1502+
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15131503
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
15141504
; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
1515-
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1516-
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
15171505
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
15181506
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
15191507
; GFX90A-NEXT: buffer_wbinvl1_vol
1520-
; GFX90A-NEXT: s_endpgm
1508+
; GFX90A-NEXT: s_setpc_b64 s[30:31]
15211509
;
15221510
; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
15231511
; GFX942: ; %bb.0: ; %main_body
1524-
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1512+
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15251513
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
1526-
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
1527-
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
15281514
; GFX942-NEXT: buffer_wbl2 sc1
15291515
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
15301516
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
15311517
; GFX942-NEXT: buffer_inv sc1
1532-
; GFX942-NEXT: s_endpgm
1518+
; GFX942-NEXT: s_setpc_b64 s[30:31]
15331519
main_body:
15341520
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
15351521
ret void

0 commit comments

Comments
 (0)