-
Notifications
You must be signed in to change notification settings - Fork 14k
Reapply "[AMDGPU][GlobalISel] Fix load/store of pointer vectors, buffer.*.pN (#110714)" v3 #114443
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Krzysztof Drewniak (krzysz00) ChangesThis reverts commit 8a849a2. It seems I missed a spot when trying to ensure the code in the instruction selection tests were actually legalized MIR. Patch is 342.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114443.diff 12 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index d7126132356d2c..ffff34417232bb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -494,6 +494,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
return false;
const unsigned Size = Ty.getSizeInBits();
+ if (Ty.isPointerVector())
+ return true;
if (Size <= 64)
return false;
// Address space 8 pointers get their own workaround.
@@ -502,9 +504,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
if (!Ty.isVector())
return true;
- if (Ty.isPointerVector())
- return true;
-
unsigned EltSize = Ty.getScalarSizeInBits();
return EltSize != 32 && EltSize != 64;
}
@@ -5820,8 +5819,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
return Reg;
}
-Register AMDGPULegalizerInfo::fixStoreSourceType(
- MachineIRBuilder &B, Register VData, bool IsFormat) const {
+Register AMDGPULegalizerInfo::fixStoreSourceType(MachineIRBuilder &B,
+ Register VData, LLT MemTy,
+ bool IsFormat) const {
MachineRegisterInfo *MRI = B.getMRI();
LLT Ty = MRI->getType(VData);
@@ -5831,6 +5831,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
if (hasBufferRsrcWorkaround(Ty))
return castBufferRsrcToV4I32(VData, B);
+ if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
+ Ty = getBitcastRegisterType(Ty);
+ VData = B.buildBitcast(Ty, VData).getReg(0);
+ }
// Fixup illegal register types for i8 stores.
if (Ty == LLT::scalar(8) || Ty == S16) {
Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
@@ -5848,22 +5852,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
}
bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &B,
+ LegalizerHelper &Helper,
bool IsTyped,
bool IsFormat) const {
+ MachineIRBuilder &B = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *B.getMRI();
+
Register VData = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(VData);
LLT EltTy = Ty.getScalarType();
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
const LLT S32 = LLT::scalar(32);
- VData = fixStoreSourceType(B, VData, IsFormat);
- castBufferRsrcArgToV4I32(MI, B, 2);
- Register RSrc = MI.getOperand(2).getReg();
-
MachineMemOperand *MMO = *MI.memoperands_begin();
const int MemSize = MMO->getSize().getValue();
+ LLT MemTy = MMO->getMemoryType();
+
+ VData = fixStoreSourceType(B, VData, MemTy, IsFormat);
+
+ castBufferRsrcArgToV4I32(MI, B, 2);
+ Register RSrc = MI.getOperand(2).getReg();
unsigned ImmOffset;
@@ -5956,10 +5964,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
}
bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &B,
+ LegalizerHelper &Helper,
bool IsFormat,
bool IsTyped) const {
+ MachineIRBuilder &B = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *B.getMRI();
+ GISelChangeObserver &Observer = Helper.Observer;
+
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
MachineMemOperand *MMO = *MI.memoperands_begin();
const LLT MemTy = MMO->getMemoryType();
@@ -6008,9 +6019,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
// Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
// logic doesn't have to handle that case.
if (hasBufferRsrcWorkaround(Ty)) {
+ Observer.changingInstr(MI);
Ty = castBufferRsrcFromV4I32(MI, B, MRI, 0);
+ Observer.changedInstr(MI);
Dst = MI.getOperand(0).getReg();
+ B.setInsertPt(B.getMBB(), MI);
}
+ if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
+ Ty = getBitcastRegisterType(Ty);
+ Observer.changingInstr(MI);
+ Helper.bitcastDst(MI, Ty, 0);
+ Observer.changedInstr(MI);
+ Dst = MI.getOperand(0).getReg();
+ B.setInsertPt(B.getMBB(), MI);
+ }
+
LLT EltTy = Ty.getScalarType();
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
const bool Unpacked = ST.hasUnpackedD16VMem();
@@ -7390,17 +7413,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_raw_ptr_buffer_store:
case Intrinsic::amdgcn_struct_buffer_store:
case Intrinsic::amdgcn_struct_ptr_buffer_store:
- return legalizeBufferStore(MI, MRI, B, false, false);
+ return legalizeBufferStore(MI, Helper, false, false);
case Intrinsic::amdgcn_raw_buffer_store_format:
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
case Intrinsic::amdgcn_struct_buffer_store_format:
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
- return legalizeBufferStore(MI, MRI, B, false, true);
+ return legalizeBufferStore(MI, Helper, false, true);
case Intrinsic::amdgcn_raw_tbuffer_store:
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
case Intrinsic::amdgcn_struct_tbuffer_store:
case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
- return legalizeBufferStore(MI, MRI, B, true, true);
+ return legalizeBufferStore(MI, Helper, true, true);
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_ptr_buffer_load:
case Intrinsic::amdgcn_raw_atomic_buffer_load:
@@ -7409,17 +7432,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_struct_ptr_buffer_load:
case Intrinsic::amdgcn_struct_atomic_buffer_load:
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
- return legalizeBufferLoad(MI, MRI, B, false, false);
+ return legalizeBufferLoad(MI, Helper, false, false);
case Intrinsic::amdgcn_raw_buffer_load_format:
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
case Intrinsic::amdgcn_struct_buffer_load_format:
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
- return legalizeBufferLoad(MI, MRI, B, true, false);
+ return legalizeBufferLoad(MI, Helper, true, false);
case Intrinsic::amdgcn_raw_tbuffer_load:
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
case Intrinsic::amdgcn_struct_tbuffer_load:
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
- return legalizeBufferLoad(MI, MRI, B, true, true);
+ return legalizeBufferLoad(MI, Helper, true, true);
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
case Intrinsic::amdgcn_struct_buffer_atomic_swap:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 84470dc75b60ef..86c15197805d23 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -195,15 +195,13 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register Reg, bool ImageStore = false) const;
- Register fixStoreSourceType(MachineIRBuilder &B, Register VData,
+ Register fixStoreSourceType(MachineIRBuilder &B, Register VData, LLT MemTy,
bool IsFormat) const;
- bool legalizeBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B, bool IsTyped,
- bool IsFormat) const;
- bool legalizeBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B, bool IsFormat,
- bool IsTyped) const;
+ bool legalizeBufferStore(MachineInstr &MI, LegalizerHelper &Helper,
+ bool IsTyped, bool IsFormat) const;
+ bool legalizeBufferLoad(MachineInstr &MI, LegalizerHelper &Helper,
+ bool IsFormat, bool IsTyped) const;
bool legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B,
Intrinsic::ID IID) const;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 3556f6a95b521e..852430129251c6 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -585,7 +585,7 @@ class RegisterTypes<list<ValueType> reg_types> {
def Reg16Types : RegisterTypes<[i16, f16, bf16]>;
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>;
-def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, v4i16, v4f16, v4bf16]>;
+def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, p1, p4, v4i16, v4f16, v4bf16]>;
def Reg96Types : RegisterTypes<[v3i32, v3f32]>;
def Reg128Types : RegisterTypes<[v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16]>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-store-pointers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-store-pointers.ll
new file mode 100644
index 00000000000000..091c9f143ce7ee
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-store-pointers.ll
@@ -0,0 +1,301 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck --check-prefix=GFX9 %s
+
+define ptr @buffer_load_p0(ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_load_p0
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub0
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub1
+ ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
+ ; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
+ ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ %ret = call ptr @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret ptr %ret
+}
+
+define void @buffer_store_p0(ptr %data, ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_store_p0
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0, $vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+ ; GFX9-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: SI_RETURN
+ call void @llvm.amdgcn.raw.ptr.buffer.store.p0(ptr %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret void
+}
+
+define ptr addrspace(1) @buffer_load_p1(ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_load_p1
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub0
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub1
+ ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
+ ; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
+ ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ %ret = call ptr addrspace(1) @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret ptr addrspace(1) %ret
+}
+
+define void @buffer_store_p1(ptr addrspace(1) %data, ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_store_p1
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0, $vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+ ; GFX9-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: SI_RETURN
+ call void @llvm.amdgcn.raw.ptr.buffer.store.p1(ptr addrspace(1) %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret void
+}
+
+define ptr addrspace(4) @buffer_load_p4(ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_load_p4
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub0
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub1
+ ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
+ ; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
+ ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ %ret = call ptr addrspace(4) @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret ptr addrspace(4) %ret
+}
+
+define void @buffer_store_p4(ptr addrspace(4) %data, ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_store_p4
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0, $vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+ ; GFX9-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: SI_RETURN
+ call void @llvm.amdgcn.raw.ptr.buffer.store.p4(ptr addrspace(4) %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret void
+}
+
+define ptr addrspace(5) @buffer_load_p5(ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_load_p5
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GFX9-NEXT: SI_RETURN implicit $vgpr0
+ %ret = call ptr addrspace(5) @llvm.amdgcn.raw.ptr.buffer.load.p5(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret ptr addrspace(5) %ret
+}
+
+define void @buffer_store_p5(ptr addrspace(5) %data, ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_store_p5
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+ ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: SI_RETURN
+ call void @llvm.amdgcn.raw.ptr.buffer.store.p5(ptr addrspace(5) %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret void
+}
+
+define <2 x ptr addrspace(1)> @buffer_load_v2p1(ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_load_v2p1
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_...
[truncated]
|
arsenm
approved these changes
Oct 31, 2024
98567be
to
e154479
Compare
…er.*.pN (llvm#110714)" v3 This reverts commit 8a849a2. It seems I missed a spot when trying to ensure the code in the instruction selection tests were actually legalized MIR.
e154479
to
e7f3559
Compare
smallp-o-p
pushed a commit
to smallp-o-p/llvm-project
that referenced
this pull request
Nov 3, 2024
…er.*.pN (llvm#110714)" v3 (llvm#114443) This reverts commit 8a849a2. It seems I missed a spot when trying to ensure the code in the instruction selection tests were actually legalized MIR.
NoumanAmir657
pushed a commit
to NoumanAmir657/llvm-project
that referenced
this pull request
Nov 4, 2024
…er.*.pN (llvm#110714)" v3 (llvm#114443) This reverts commit 8a849a2. It seems I missed a spot when trying to ensure the code in the instruction selection tests were actually legalized MIR.
searlmc1
pushed a commit
to ROCm/llvm-project
that referenced
this pull request
Mar 24, 2025
…er.*.pN (llvm#110714)" v3 (llvm#114443) This reverts commit 8a849a2. It seems I missed a spot when trying to ensure the code in the instruction selection tests were actually legalized MIR.
jrbyrnes
pushed a commit
to jrbyrnes/llvm-project
that referenced
this pull request
May 27, 2025
…er.*.pN (llvm#110714)" v3 (llvm#114443) This reverts commit 8a849a2. It seems I missed a spot when trying to ensure the code in the instruction selection tests were actually legalized MIR.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
This reverts commit 8a849a2.
It seems I missed a spot when trying to ensure the code in the instruction selection tests were actually legalized MIR.