Skip to content

Commit 44edfca

Browse files
committed
[AMDGPU][AMDGPURegBankInfo] Map S_BUFFER_LOAD_XXX to its corresponding BUFFER_LOAD_XXX
In some tests code generation diverged between isel and selection-dag For exmaple, this intrinsic call i16 @llvm.amdgcn.s.buffer.load.u16(<4 x i32> %src, i32 %offset, i32 0) would be lowered into these two cases: * buffer_load_u16 v2, v2, s[0:3], null offen * buffer_load_b32 v2, v2, s[0:3], null offen This patch fixes this issue.
1 parent 7800d59 commit 44edfca

File tree

2 files changed

+55
-64
lines changed

2 files changed

+55
-64
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,16 +1406,37 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
14061406
if (i != 0)
14071407
BaseMMO = MF.getMachineMemOperand(BaseMMO, MMOOffset + 16 * i, MemSize);
14081408

1409-
B.buildInstr(AMDGPU::G_AMDGPU_BUFFER_LOAD)
1410-
.addDef(LoadParts[i]) // vdata
1411-
.addUse(RSrc) // rsrc
1412-
.addUse(VIndex) // vindex
1413-
.addUse(VOffset) // voffset
1414-
.addUse(SOffset) // soffset
1415-
.addImm(ImmOffset + 16 * i) // offset(imm)
1416-
.addImm(0) // cachepolicy, swizzled buffer(imm)
1417-
.addImm(0) // idxen(imm)
1418-
.addMemOperand(MMO);
1409+
unsigned Opc;
1410+
switch (MI.getOpcode()) {
1411+
case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
1412+
Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD;
1413+
break;
1414+
case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
1415+
Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE;
1416+
break;
1417+
case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
1418+
Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE;
1419+
break;
1420+
case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
1421+
Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
1422+
break;
1423+
case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT:
1424+
Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
1425+
break;
1426+
default:
1427+
llvm_unreachable("Unexpected opcode");
1428+
}
1429+
1430+
B.buildInstr(Opc)
1431+
.addDef(LoadParts[i]) // vdata
1432+
.addUse(RSrc) // rsrc
1433+
.addUse(VIndex) // vindex
1434+
.addUse(VOffset) // voffset
1435+
.addUse(SOffset) // soffset
1436+
.addImm(ImmOffset + 16 * i) // offset(imm)
1437+
.addImm(0) // cachepolicy, swizzled buffer(imm)
1438+
.addImm(0) // idxen(imm)
1439+
.addMemOperand(MMO);
14191440
}
14201441

14211442
// TODO: If only the resource is a VGPR, it may be better to execute the

llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll

Lines changed: 24 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -465,19 +465,12 @@ main_body:
465465
}
466466

467467
define amdgpu_ps void @s_buffer_load_byte_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
468-
; DAG-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
469-
; DAG: ; %bb.0: ; %main_body
470-
; DAG-NEXT: buffer_load_i8 v2, v2, s[0:3], null offen
471-
; DAG-NEXT: s_wait_loadcnt 0x0
472-
; DAG-NEXT: global_store_b32 v[0:1], v2, off
473-
; DAG-NEXT: s_endpgm
474-
;
475-
; GISEL-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
476-
; GISEL: ; %bb.0: ; %main_body
477-
; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
478-
; GISEL-NEXT: s_wait_loadcnt 0x0
479-
; GISEL-NEXT: global_store_b32 v[0:1], v2, off
480-
; GISEL-NEXT: s_endpgm
468+
; GCN-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
469+
; GCN: ; %bb.0: ; %main_body
470+
; GCN-NEXT: buffer_load_i8 v2, v2, s[0:3], null offen
471+
; GCN-NEXT: s_wait_loadcnt 0x0
472+
; GCN-NEXT: global_store_b32 v[0:1], v2, off
473+
; GCN-NEXT: s_endpgm
481474
main_body:
482475
%ld = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %src, i32 %offset, i32 0)
483476
%sext = sext i8 %ld to i32
@@ -538,20 +531,12 @@ main_body:
538531
}
539532

540533
define amdgpu_ps void @s_buffer_load_ubyte_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
541-
; DAG-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
542-
; DAG: ; %bb.0: ; %main_body
543-
; DAG-NEXT: buffer_load_u8 v2, v2, s[0:3], null offen
544-
; DAG-NEXT: s_wait_loadcnt 0x0
545-
; DAG-NEXT: global_store_b32 v[0:1], v2, off
546-
; DAG-NEXT: s_endpgm
547-
;
548-
; GISEL-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
549-
; GISEL: ; %bb.0: ; %main_body
550-
; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
551-
; GISEL-NEXT: s_wait_loadcnt 0x0
552-
; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
553-
; GISEL-NEXT: global_store_b32 v[0:1], v2, off
554-
; GISEL-NEXT: s_endpgm
534+
; GCN-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
535+
; GCN: ; %bb.0: ; %main_body
536+
; GCN-NEXT: buffer_load_u8 v2, v2, s[0:3], null offen
537+
; GCN-NEXT: s_wait_loadcnt 0x0
538+
; GCN-NEXT: global_store_b32 v[0:1], v2, off
539+
; GCN-NEXT: s_endpgm
555540
main_body:
556541
%ld = call i8 @llvm.amdgcn.s.buffer.load.u8(<4 x i32> %src, i32 %offset, i32 0)
557542
%zext = zext i8 %ld to i32
@@ -606,19 +591,12 @@ main_body:
606591
}
607592

608593
define amdgpu_ps void @s_buffer_load_short_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
609-
; DAG-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
610-
; DAG: ; %bb.0: ; %main_body
611-
; DAG-NEXT: buffer_load_i16 v2, v2, s[0:3], null offen
612-
; DAG-NEXT: s_wait_loadcnt 0x0
613-
; DAG-NEXT: global_store_b32 v[0:1], v2, off
614-
; DAG-NEXT: s_endpgm
615-
;
616-
; GISEL-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
617-
; GISEL: ; %bb.0: ; %main_body
618-
; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
619-
; GISEL-NEXT: s_wait_loadcnt 0x0
620-
; GISEL-NEXT: global_store_b32 v[0:1], v2, off
621-
; GISEL-NEXT: s_endpgm
594+
; GCN-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
595+
; GCN: ; %bb.0: ; %main_body
596+
; GCN-NEXT: buffer_load_i16 v2, v2, s[0:3], null offen
597+
; GCN-NEXT: s_wait_loadcnt 0x0
598+
; GCN-NEXT: global_store_b32 v[0:1], v2, off
599+
; GCN-NEXT: s_endpgm
622600
main_body:
623601
%ld = call i16 @llvm.amdgcn.s.buffer.load.i16(<4 x i32> %src, i32 %offset, i32 0)
624602
%sext = sext i16 %ld to i32
@@ -679,20 +657,12 @@ main_body:
679657
}
680658

681659
define amdgpu_ps void @s_buffer_load_ushort_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
682-
; DAG-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
683-
; DAG: ; %bb.0: ; %main_body
684-
; DAG-NEXT: buffer_load_u16 v2, v2, s[0:3], null offen
685-
; DAG-NEXT: s_wait_loadcnt 0x0
686-
; DAG-NEXT: global_store_b32 v[0:1], v2, off
687-
; DAG-NEXT: s_endpgm
688-
;
689-
; GISEL-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
690-
; GISEL: ; %bb.0: ; %main_body
691-
; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
692-
; GISEL-NEXT: s_wait_loadcnt 0x0
693-
; GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
694-
; GISEL-NEXT: global_store_b32 v[0:1], v2, off
695-
; GISEL-NEXT: s_endpgm
660+
; GCN-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
661+
; GCN: ; %bb.0: ; %main_body
662+
; GCN-NEXT: buffer_load_u16 v2, v2, s[0:3], null offen
663+
; GCN-NEXT: s_wait_loadcnt 0x0
664+
; GCN-NEXT: global_store_b32 v[0:1], v2, off
665+
; GCN-NEXT: s_endpgm
696666
main_body:
697667
%ld = call i16 @llvm.amdgcn.s.buffer.load.u16(<4 x i32> %src, i32 %offset, i32 0)
698668
%zext = zext i16 %ld to i32

0 commit comments

Comments
 (0)