Skip to content

Commit f79b902

Browse files
committed
used isXNACKEnabled subtarget option in the PatFrag to optimize the selection.
1 parent fcd298c commit f79b902

File tree

227 files changed

+34516
-33673
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

227 files changed

+34516
-33673
lines changed

llvm/lib/Target/AMDGPU/SMInstructions.td

Lines changed: 69 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -868,103 +868,60 @@ def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">;
868868
def SMRDBufferSgprImm : ComplexPattern<iPTR, 2, "SelectSMRDBufferSgprImm">;
869869

870870
class SMRDAlignedLoadPat<PatFrag Op> : PatFrag <(ops node:$ptr), (Op node:$ptr), [{
871+
// Ignore the alignment check if XNACK support is disabled.
872+
if (!Subtarget->isXNACKEnabled())
873+
return true;
874+
871875
// Returns true if it is a naturally aligned multi-dword load.
872876
LoadSDNode *Ld = cast<LoadSDNode>(N);
873877
unsigned Size = Ld->getMemoryVT().getStoreSize();
874-
return (Size <= 4) || (Ld->getAlign().value() >= PowerOf2Ceil(Size));
878+
return Size <= 4 || Ld->getAlign().value() >= Size;
875879
}]> {
876880
let GISelPredicateCode = [{
877-
auto &Ld = cast<GLoad>(MI);
878-
TypeSize Size = Ld.getMMO().getSize().getValue();
879-
return (Size <= 4) || (Ld.getMMO().getAlign().value() >= PowerOf2Ceil(Size));
881+
if (!Subtarget->isXNACKEnabled())
882+
return true;
883+
884+
auto &Ld = cast<GLoad>(MI);
885+
TypeSize Size = Ld.getMMO().getSize().getValue();
886+
return Size <= 4 || Ld.getMMO().getAlign().value() >= Size;
880887
}];
881888
}
882889

883890
class SMRDUnalignedLoadPat<PatFrag Op> : PatFrag <(ops node:$ptr), (Op node:$ptr), [{
891+
// Do the alignment check if XNACK support is enabled.
892+
if (!Subtarget->isXNACKEnabled())
893+
return false;
894+
884895
// Returns true if it is an under aligned multi-dword load.
885896
LoadSDNode *Ld = cast<LoadSDNode>(N);
886897
unsigned Size = Ld->getMemoryVT().getStoreSize();
887-
return (Size > 4) && (Ld->getAlign().value() < PowerOf2Ceil(Size));
898+
return Size > 4 && (Ld->getAlign().value() < Size);
888899
}]> {
889900
let GISelPredicateCode = [{
890-
auto &Ld = cast<GLoad>(MI);
891-
TypeSize Size = Ld.getMMO().getSize().getValue();
892-
return (Size > 4) && (Ld.getMMO().getAlign().value() < PowerOf2Ceil(Size));
901+
if (!Subtarget->isXNACKEnabled())
902+
return false;
903+
904+
auto &Ld = cast<GLoad>(MI);
905+
TypeSize Size = Ld.getMMO().getSize().getValue();
906+
return Size > 4 && (Ld.getMMO().getAlign().value() < Size);
893907
}];
894908
}
895909

896-
def alignedmultidwordload : SMRDAlignedLoadPat<smrd_load>;
897-
def unalignedmultidwordload : SMRDUnalignedLoadPat<smrd_load>;
898-
899-
multiclass SMRD_Align_Pattern <string Instr, ValueType vt> {
900-
901-
// 1. IMM offset
902-
def : GCNPat <
903-
(alignedmultidwordload (SMRDImm i64:$sbase, i32:$offset)),
904-
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))> {
905-
let OtherPredicates = [isGFX8Plus];
906-
}
907-
def : GCNPat <
908-
(unalignedmultidwordload (SMRDImm i64:$sbase, i32:$offset)),
909-
(vt (!cast<SM_Pseudo>(Instr#"_IMM_ec") $sbase, $offset, 0))> {
910-
let OtherPredicates = [isGFX8Plus];
911-
}
912-
913-
// 2. SGPR offset
914-
def : GCNPat <
915-
(alignedmultidwordload (SMRDSgpr i64:$sbase, i32:$soffset)),
916-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0))> {
917-
let OtherPredicates = [isGFX8Only];
918-
}
919-
def : GCNPat <
920-
(unalignedmultidwordload (SMRDSgpr i64:$sbase, i32:$soffset)),
921-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_ec") $sbase, $soffset, 0))> {
922-
let OtherPredicates = [isGFX8Only];
923-
}
924-
def : GCNPat <
925-
(alignedmultidwordload (SMRDSgpr i64:$sbase, i32:$soffset)),
926-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, 0, 0))> {
927-
let OtherPredicates = [isGFX9Plus];
928-
}
929-
def : GCNPat <
930-
(unalignedmultidwordload (SMRDSgpr i64:$sbase, i32:$soffset)),
931-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM_ec") $sbase, $soffset, 0, 0))> {
932-
let OtherPredicates = [isGFX9Plus];
933-
}
934-
935-
// 3. SGPR+IMM offset
936-
def : GCNPat <
937-
(alignedmultidwordload (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
938-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> {
939-
let OtherPredicates = [isGFX9Plus];
940-
}
941-
def : GCNPat <
942-
(unalignedmultidwordload (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
943-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM_ec") $sbase, $soffset, $offset, 0))> {
944-
let OtherPredicates = [isGFX9Plus];
945-
}
946-
947-
// 4. No offset
948-
def : GCNPat <
949-
(vt (alignedmultidwordload (i64 SReg_64:$sbase))),
950-
(vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))> {
951-
let OtherPredicates = [isGFX8Plus];
952-
}
953-
def : GCNPat <
954-
(vt (unalignedmultidwordload (i64 SReg_64:$sbase))),
955-
(vt (!cast<SM_Pseudo>(Instr#"_IMM_ec") i64:$sbase, 0, 0))> {
956-
let OtherPredicates = [isGFX8Plus];
957-
}
958-
}
910+
def aligned_smrd_load : SMRDAlignedLoadPat<smrd_load>;
911+
def unaligned_smrd_load : SMRDUnalignedLoadPat<smrd_load>;
959912

960913
multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> {
961914

962915
// 1. IMM offset
963916
def : GCNPat <
964-
(smrd_load (SMRDImm i64:$sbase, i32:$offset)),
965-
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))> {
966-
let OtherPredicates = [isGFX6GFX7];
967-
}
917+
(aligned_smrd_load (SMRDImm i64:$sbase, i32:$offset)),
918+
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
919+
>;
920+
if !gt(vt.Size, 32) then
921+
def : GCNPat <
922+
(unaligned_smrd_load (SMRDImm i64:$sbase, i32:$offset)),
923+
(vt (!cast<SM_Pseudo>(Instr#"_IMM_ec") $sbase, $offset, 0))
924+
>;
968925

969926
// 2. 32-bit IMM offset on CI
970927
if immci then def : GCNPat <
@@ -975,19 +932,49 @@ multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> {
975932

976933
// 3. SGPR offset
977934
def : GCNPat <
978-
(smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
935+
(aligned_smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
979936
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0))> {
980-
let OtherPredicates = [isGFX6GFX7];
937+
let OtherPredicates = [isNotGFX9Plus];
938+
}
939+
def : GCNPat <
940+
(aligned_smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
941+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, 0, 0))> {
942+
let OtherPredicates = [isGFX9Plus];
943+
}
944+
if !gt(vt.Size, 32) then {
945+
def : GCNPat <
946+
(unaligned_smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
947+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_ec") $sbase, $soffset, 0))> {
948+
let OtherPredicates = [isNotGFX9Plus];
949+
}
950+
def : GCNPat <
951+
(unaligned_smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
952+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM_ec") $sbase, $soffset, 0, 0))> {
953+
let OtherPredicates = [isGFX9Plus];
954+
}
981955
}
982956

983-
// 4. No offset
957+
// 4. SGPR+IMM offset
984958
def : GCNPat <
985-
(vt (smrd_load (i64 SReg_64:$sbase))),
986-
(vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))> {
987-
let OtherPredicates = [isGFX6GFX7];
959+
(aligned_smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
960+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> {
961+
let OtherPredicates = [isGFX9Plus];
988962
}
963+
if !gt(vt.Size, 32) then
964+
def : GCNPat <
965+
(unaligned_smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
966+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM_ec") $sbase, $soffset, $offset, 0))> {
967+
let OtherPredicates = [isGFX9Plus];
968+
}
989969

990-
defm : SMRD_Align_Pattern<Instr, vt>;
970+
// 5. No offset
971+
def : GCNPat <
972+
(vt (aligned_smrd_load (i64 SReg_64:$sbase))),
973+
(vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))>;
974+
if !gt(vt.Size, 32) then
975+
def : GCNPat <
976+
(vt (unaligned_smrd_load (i64 SReg_64:$sbase))),
977+
(vt (!cast<SM_Pseudo>(Instr#"_IMM_ec") i64:$sbase, 0, 0))>;
991978
}
992979

993980
multiclass SMLoad_Pattern <string Instr, ValueType vt, bit immci = true> {

llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
77
; GFX11: ; %bb.0: ; %entry
88
; GFX11-NEXT: s_clause 0x1
99
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
10-
; GFX11-NEXT: s_load_b64 s[2:3], s[0:1], 0x34
10+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
1111
; GFX11-NEXT: v_mov_b32_e32 v2, 0
1212
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
13-
; GFX11-NEXT: s_add_u32 s0, s6, s2
14-
; GFX11-NEXT: s_addc_u32 s1, s7, s3
13+
; GFX11-NEXT: s_add_u32 s0, s6, s0
14+
; GFX11-NEXT: s_addc_u32 s1, s7, s1
1515
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1616
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1717
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
@@ -23,10 +23,10 @@ define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
2323
; GFX12: ; %bb.0: ; %entry
2424
; GFX12-NEXT: s_clause 0x1
2525
; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
26-
; GFX12-NEXT: s_load_b64 s[2:3], s[0:1], 0x34
26+
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
2727
; GFX12-NEXT: v_mov_b32_e32 v2, 0
2828
; GFX12-NEXT: s_wait_kmcnt 0x0
29-
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[6:7], s[2:3]
29+
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[6:7], s[0:1]
3030
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3131
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3232
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[4:5]
@@ -59,11 +59,11 @@ define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
5959
; GFX11: ; %bb.0: ; %entry
6060
; GFX11-NEXT: s_clause 0x1
6161
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
62-
; GFX11-NEXT: s_load_b64 s[2:3], s[0:1], 0x34
62+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
6363
; GFX11-NEXT: v_mov_b32_e32 v2, 0
6464
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
65-
; GFX11-NEXT: s_sub_u32 s0, s6, s2
66-
; GFX11-NEXT: s_subb_u32 s1, s7, s3
65+
; GFX11-NEXT: s_sub_u32 s0, s6, s0
66+
; GFX11-NEXT: s_subb_u32 s1, s7, s1
6767
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
6868
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
6969
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
@@ -75,10 +75,10 @@ define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
7575
; GFX12: ; %bb.0: ; %entry
7676
; GFX12-NEXT: s_clause 0x1
7777
; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
78-
; GFX12-NEXT: s_load_b64 s[2:3], s[0:1], 0x34
78+
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
7979
; GFX12-NEXT: v_mov_b32_e32 v2, 0
8080
; GFX12-NEXT: s_wait_kmcnt 0x0
81-
; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[6:7], s[2:3]
81+
; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[6:7], s[0:1]
8282
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
8383
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
8484
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[4:5]

llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,9 @@ define amdgpu_kernel void @brcond_sgpr_trunc_and(i32 %cond0, i32 %cond1) {
131131
;
132132
; WAVE32-LABEL: brcond_sgpr_trunc_and:
133133
; WAVE32: ; %bb.0: ; %entry
134-
; WAVE32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
134+
; WAVE32-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
135135
; WAVE32-NEXT: s_waitcnt lgkmcnt(0)
136-
; WAVE32-NEXT: s_and_b32 s0, s2, s3
136+
; WAVE32-NEXT: s_and_b32 s0, s0, s1
137137
; WAVE32-NEXT: s_xor_b32 s0, s0, 1
138138
; WAVE32-NEXT: s_and_b32 s0, s0, 1
139139
; WAVE32-NEXT: s_cmp_lg_u32 s0, 0

llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,20 +1401,20 @@ define amdgpu_kernel void @cvt_ubyte0_or_multiuse(ptr addrspace(1) %in, ptr addr
14011401
;
14021402
; VI-LABEL: cvt_ubyte0_or_multiuse:
14031403
; VI: ; %bb.0: ; %bb
1404-
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1404+
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
14051405
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
14061406
; VI-NEXT: s_waitcnt lgkmcnt(0)
1407-
; VI-NEXT: v_mov_b32_e32 v0, s4
1408-
; VI-NEXT: v_mov_b32_e32 v1, s5
1407+
; VI-NEXT: v_mov_b32_e32 v0, s0
1408+
; VI-NEXT: v_mov_b32_e32 v1, s1
14091409
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
14101410
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
14111411
; VI-NEXT: flat_load_dword v0, v[0:1]
14121412
; VI-NEXT: s_waitcnt vmcnt(0)
14131413
; VI-NEXT: v_or_b32_e32 v0, 0x80000001, v0
14141414
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
14151415
; VI-NEXT: v_add_f32_e32 v2, v0, v1
1416-
; VI-NEXT: v_mov_b32_e32 v0, s6
1417-
; VI-NEXT: v_mov_b32_e32 v1, s7
1416+
; VI-NEXT: v_mov_b32_e32 v0, s2
1417+
; VI-NEXT: v_mov_b32_e32 v1, s3
14181418
; VI-NEXT: flat_store_dword v[0:1], v2
14191419
; VI-NEXT: s_endpgm
14201420
bb:

0 commit comments

Comments
 (0)