Skip to content

Commit 24ed04b

Browse files
committed
[AMDGPU] Handle nontemporal and amdgpu.last.use metadata in amdgpu-lower-buffer-fat-pointers
1 parent f9c8c01 commit 24ed04b

File tree

3 files changed

+392
-5
lines changed

3 files changed

+392
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,6 +1088,10 @@ Value *SplitPtrStructs::handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr,
10881088
Aux |= (Aux & AMDGPU::CPol::GLC ? AMDGPU::CPol::DLC : 0);
10891089
if (IsVolatile)
10901090
Aux |= AMDGPU::CPol::VOLATILE;
1091+
if (I->hasMetadata("amdgpu.last.use"))
1092+
Aux |= AMDGPU::CPol::TH_LU;
1093+
if (I->hasMetadata("nontemporal"))
1094+
Aux |= AMDGPU::CPol::TH_NT;
10911095
Args.push_back(IRB.getInt32(Aux));
10921096

10931097
Intrinsic::ID IID = Intrinsic::not_intrinsic;

llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-memops.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@ define void @loads(ptr addrspace(8) %buf) {
1111
; CHECK-NEXT: [[SCALAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
1212
; CHECK-NEXT: [[VEC2:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) align 8 [[BUF]], i32 16, i32 0, i32 0)
1313
; CHECK-NEXT: [[VEC4:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0)
14-
; CHECK-NEXT: [[NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 2), !nontemporal [[META0:![0-9]+]]
14+
; CHECK-NEXT: [[NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 3), !nontemporal [[META0:![0-9]+]]
1515
; CHECK-NEXT: [[INVARIANT:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !invariant.load [[META1:![0-9]+]]
16-
; CHECK-NEXT: [[NONTEMPORAL_INVARIANT:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !invariant.load [[META1]], !nontemporal [[META0]]
16+
; CHECK-NEXT: [[NONTEMPORAL_INVARIANT:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 1), !invariant.load [[META1]], !nontemporal [[META0]]
1717
; CHECK-NEXT: [[VOLATILE:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
18-
; CHECK-NEXT: [[VOLATILE_NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483646), !nontemporal [[META0]]
18+
; CHECK-NEXT: [[VOLATILE_NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483645), !nontemporal [[META0]]
1919
; CHECK-NEXT: fence syncscope("wavefront") release
2020
; CHECK-NEXT: [[ATOMIC:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483647)
2121
; CHECK-NEXT: fence syncscope("wavefront") acquire
@@ -50,9 +50,9 @@ define void @stores(ptr addrspace(8) %buf, float %f, <4 x float> %f4) {
5050
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], float [[F:%.*]], <4 x float> [[F4:%.*]]) #[[ATTR0]] {
5151
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
5252
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> [[F4]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0)
53-
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 2), !nontemporal [[META0]]
53+
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 3), !nontemporal [[META0]]
5454
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
55-
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483646), !nontemporal [[META0]]
55+
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483645), !nontemporal [[META0]]
5656
; CHECK-NEXT: fence syncscope("wavefront") release
5757
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483647)
5858
; CHECK-NEXT: fence syncscope("wavefront") acquire

0 commit comments

Comments
 (0)