-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU][AMDGPURegBankInfo] Map S_BUFFER_LOAD_XXX to its corresponding BUFFER_LOAD_XXX #117574
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Juan Manuel Martinez Caamaño (jmmartinez) ChangesIn one test code generation diverged between GISEL and DAG For example, this intrinsic > %ld = call i8 @llvm.amdgcn.s.buffer.load.u8(<4 x i32> %src, i32 %offset, i32 0) would be lowered into these two cases:
This patch fixes this issue. Full diff: https://github.com/llvm/llvm-project/pull/117574.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 8c050348f753bb..d3d4062c34f88c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1406,16 +1406,37 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
if (i != 0)
BaseMMO = MF.getMachineMemOperand(BaseMMO, MMOOffset + 16 * i, MemSize);
- B.buildInstr(AMDGPU::G_AMDGPU_BUFFER_LOAD)
- .addDef(LoadParts[i]) // vdata
- .addUse(RSrc) // rsrc
- .addUse(VIndex) // vindex
- .addUse(VOffset) // voffset
- .addUse(SOffset) // soffset
- .addImm(ImmOffset + 16 * i) // offset(imm)
- .addImm(0) // cachepolicy, swizzled buffer(imm)
- .addImm(0) // idxen(imm)
- .addMemOperand(MMO);
+ unsigned Opc;
+ switch (MI.getOpcode()) {
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
+ Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD;
+ break;
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
+ Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE;
+ break;
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
+ Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE;
+ break;
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
+ Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
+ break;
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT:
+ Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
+ break;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+
+ B.buildInstr(Opc)
+ .addDef(LoadParts[i]) // vdata
+ .addUse(RSrc) // rsrc
+ .addUse(VIndex) // vindex
+ .addUse(VOffset) // voffset
+ .addUse(SOffset) // soffset
+ .addImm(ImmOffset + 16 * i) // offset(imm)
+ .addImm(0) // cachepolicy, swizzled buffer(imm)
+ .addImm(0) // idxen(imm)
+ .addMemOperand(MMO);
}
// TODO: If only the resource is a VGPR, it may be better to execute the
diff --git a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
index 020c9dc130bb2a..61ae9639c52d00 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
@@ -465,19 +465,12 @@ main_body:
}
define amdgpu_ps void @s_buffer_load_byte_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
-; DAG-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
-; DAG: ; %bb.0: ; %main_body
-; DAG-NEXT: buffer_load_i8 v2, v2, s[0:3], null offen
-; DAG-NEXT: s_wait_loadcnt 0x0
-; DAG-NEXT: global_store_b32 v[0:1], v2, off
-; DAG-NEXT: s_endpgm
-;
-; GISEL-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
-; GISEL: ; %bb.0: ; %main_body
-; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
-; GISEL-NEXT: s_wait_loadcnt 0x0
-; GISEL-NEXT: global_store_b32 v[0:1], v2, off
-; GISEL-NEXT: s_endpgm
+; GCN-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
+; GCN: ; %bb.0: ; %main_body
+; GCN-NEXT: buffer_load_i8 v2, v2, s[0:3], null offen
+; GCN-NEXT: s_wait_loadcnt 0x0
+; GCN-NEXT: global_store_b32 v[0:1], v2, off
+; GCN-NEXT: s_endpgm
main_body:
%ld = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %src, i32 %offset, i32 0)
%sext = sext i8 %ld to i32
@@ -538,20 +531,12 @@ main_body:
}
define amdgpu_ps void @s_buffer_load_ubyte_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
-; DAG-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
-; DAG: ; %bb.0: ; %main_body
-; DAG-NEXT: buffer_load_u8 v2, v2, s[0:3], null offen
-; DAG-NEXT: s_wait_loadcnt 0x0
-; DAG-NEXT: global_store_b32 v[0:1], v2, off
-; DAG-NEXT: s_endpgm
-;
-; GISEL-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
-; GISEL: ; %bb.0: ; %main_body
-; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
-; GISEL-NEXT: s_wait_loadcnt 0x0
-; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
-; GISEL-NEXT: global_store_b32 v[0:1], v2, off
-; GISEL-NEXT: s_endpgm
+; GCN-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
+; GCN: ; %bb.0: ; %main_body
+; GCN-NEXT: buffer_load_u8 v2, v2, s[0:3], null offen
+; GCN-NEXT: s_wait_loadcnt 0x0
+; GCN-NEXT: global_store_b32 v[0:1], v2, off
+; GCN-NEXT: s_endpgm
main_body:
%ld = call i8 @llvm.amdgcn.s.buffer.load.u8(<4 x i32> %src, i32 %offset, i32 0)
%zext = zext i8 %ld to i32
@@ -606,19 +591,12 @@ main_body:
}
define amdgpu_ps void @s_buffer_load_short_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
-; DAG-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
-; DAG: ; %bb.0: ; %main_body
-; DAG-NEXT: buffer_load_i16 v2, v2, s[0:3], null offen
-; DAG-NEXT: s_wait_loadcnt 0x0
-; DAG-NEXT: global_store_b32 v[0:1], v2, off
-; DAG-NEXT: s_endpgm
-;
-; GISEL-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
-; GISEL: ; %bb.0: ; %main_body
-; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
-; GISEL-NEXT: s_wait_loadcnt 0x0
-; GISEL-NEXT: global_store_b32 v[0:1], v2, off
-; GISEL-NEXT: s_endpgm
+; GCN-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
+; GCN: ; %bb.0: ; %main_body
+; GCN-NEXT: buffer_load_i16 v2, v2, s[0:3], null offen
+; GCN-NEXT: s_wait_loadcnt 0x0
+; GCN-NEXT: global_store_b32 v[0:1], v2, off
+; GCN-NEXT: s_endpgm
main_body:
%ld = call i16 @llvm.amdgcn.s.buffer.load.i16(<4 x i32> %src, i32 %offset, i32 0)
%sext = sext i16 %ld to i32
@@ -679,20 +657,12 @@ main_body:
}
define amdgpu_ps void @s_buffer_load_ushort_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
-; DAG-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
-; DAG: ; %bb.0: ; %main_body
-; DAG-NEXT: buffer_load_u16 v2, v2, s[0:3], null offen
-; DAG-NEXT: s_wait_loadcnt 0x0
-; DAG-NEXT: global_store_b32 v[0:1], v2, off
-; DAG-NEXT: s_endpgm
-;
-; GISEL-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
-; GISEL: ; %bb.0: ; %main_body
-; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
-; GISEL-NEXT: s_wait_loadcnt 0x0
-; GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GISEL-NEXT: global_store_b32 v[0:1], v2, off
-; GISEL-NEXT: s_endpgm
+; GCN-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
+; GCN: ; %bb.0: ; %main_body
+; GCN-NEXT: buffer_load_u16 v2, v2, s[0:3], null offen
+; GCN-NEXT: s_wait_loadcnt 0x0
+; GCN-NEXT: global_store_b32 v[0:1], v2, off
+; GCN-NEXT: s_endpgm
main_body:
%ld = call i16 @llvm.amdgcn.s.buffer.load.u16(<4 x i32> %src, i32 %offset, i32 0)
%zext = zext i16 %ld to i32
|
…g BUFFER_LOAD_XXX In some tests code generation diverged between isel and selection-dag For exmaple, this intrinsic call i16 @llvm.amdgcn.s.buffer.load.u16(<4 x i32> %src, i32 %offset, i32 0) would be lowered into these two cases: * buffer_load_u16 v2, v2, s[0:3], null offen * buffer_load_b32 v2, v2, s[0:3], null offen This patch fixes this issue.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks!
This is a bug fix so maybe it should be backported to 19.1.x. But I guess no-one is using GlobalISel for real, so maybe it does not matter.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/9870 Here is the relevant piece of the build log for the reference
|
In one test code generation diverged between GISEL and DAG
For example, this intrinsic
would be lowered into these two cases:
buffer_load_u8 v2, v2, s[0:3], null offen
buffer_load_b32 v2, v2, s[0:3], null offen
This patch fixes this issue.