Skip to content

Commit 137f785

Browse files
authored
[AMDGPU] Set MaxAtomicSizeInBitsSupported. (#75185)
This will result in larger atomic operations getting expanded to `__atomic_*` libcalls via AtomicExpandPass, which matches what Clang already does in the frontend. While AMDGPU currently disables the use of all libcalls, I've changed it to instead disable all of them _except_ the atomic ones. Those are already be emitted by the Clang frontend, and enabling them in the backend allows the same behavior there.
1 parent 83680f8 commit 137f785

File tree

3 files changed

+28
-16
lines changed

3 files changed

+28
-16
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -506,9 +506,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
506506
setOperationAction(ISD::SELECT, MVT::v12f32, Promote);
507507
AddPromotedToType(ISD::SELECT, MVT::v12f32, MVT::v12i32);
508508

509-
// There are no libcalls of any kind.
510-
for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
511-
setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
509+
// Disable most libcalls.
510+
for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) {
511+
if (I < RTLIB::ATOMIC_LOAD || I > RTLIB::ATOMIC_FETCH_NAND_16)
512+
setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
513+
}
512514

513515
setSchedulingPreference(Sched::RegPressure);
514516
setJumpIsExpensive(true);
@@ -556,6 +558,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
556558
ISD::FSUB, ISD::FNEG,
557559
ISD::FABS, ISD::AssertZext,
558560
ISD::AssertSext, ISD::INTRINSIC_WO_CHAIN});
561+
562+
setMaxAtomicSizeInBitsSupported(64);
559563
}
560564

561565
bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const {
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
2+
3+
define void @test(ptr %a) nounwind {
4+
; CHECK-LABEL: test:
5+
; CHECK: __atomic_load_16
6+
; CHECK: __atomic_store_16
7+
%1 = load atomic i128, ptr %a seq_cst, align 16
8+
store atomic i128 %1, ptr %a seq_cst, align 16
9+
ret void
10+
}

llvm/test/Transforms/AtomicExpand/AMDGPU/unaligned-atomic.ll

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
1-
; RUN: not --crash opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -atomic-expand %s 2>&1 | FileCheck %s
2-
; The AtomicExpand pass cannot handle missing libcalls (yet) so reports a fatal error.
3-
; CHECK: LLVM ERROR: expandAtomicOpToLibcall shouldn't fail for Load
1+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -atomic-expand %s 2>&1 | FileCheck --check-prefix=GCN %s
42

53
define i32 @atomic_load_global_align1(ptr addrspace(1) %ptr) {
64
; GCN-LABEL: @atomic_load_global_align1(
75
; GCN-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
8-
; GCN-NEXT: [[TMP3:%.*]] = alloca i32, align 4
9-
; GCN-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TMP3]])
10-
; GCN-NEXT: call void @0(i64 4, ptr [[TMP2]], ptr [[TMP3]], i32 5)
11-
; GCN-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
12-
; GCN-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP3]])
6+
; GCN-NEXT: [[TMP3:%.*]] = alloca i32, align 4, addrspace(5)
7+
; GCN-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP3]])
8+
; GCN-NEXT: call void @__atomic_load(i64 4, ptr [[TMP2]], ptr addrspace(5) [[TMP3]], i32 5)
9+
; GCN-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4
10+
; GCN-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP3]])
1311
; GCN-NEXT: ret i32 [[TMP5]]
1412
;
1513
%val = load atomic i32, ptr addrspace(1) %ptr seq_cst, align 1
@@ -19,11 +17,11 @@ define i32 @atomic_load_global_align1(ptr addrspace(1) %ptr) {
1917
define void @atomic_store_global_align1(ptr addrspace(1) %ptr, i32 %val) {
2018
; GCN-LABEL: @atomic_store_global_align1(
2119
; GCN-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
22-
; GCN-NEXT: [[TMP3:%.*]] = alloca i32, align 4
23-
; GCN-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TMP3]])
24-
; GCN-NEXT: store i32 [[VAL:%.*]], ptr [[TMP3]], align 4
25-
; GCN-NEXT: call void @1(i64 4, ptr [[TMP2]], ptr [[TMP3]], i32 0)
26-
; GCN-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP3]])
20+
; GCN-NEXT: [[TMP3:%.*]] = alloca i32, align 4, addrspace(5)
21+
; GCN-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP3]])
22+
; GCN-NEXT: store i32 [[VAL:%.*]], ptr addrspace(5) [[TMP3]], align 4
23+
; GCN-NEXT: call void @__atomic_store(i64 4, ptr [[TMP2]], ptr addrspace(5) [[TMP3]], i32 0)
24+
; GCN-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP3]])
2725
; GCN-NEXT: ret void
2826
;
2927
store atomic i32 %val, ptr addrspace(1) %ptr monotonic, align 1

0 commit comments

Comments
 (0)