Skip to content

Commit 9b13b4a

Browse files
committed
AMDGPU: Prepare to use scalar register indexing
Define pseudos mirroring the the VGPR indexing ones, and adjust the operands in the s_movrel* instructions to avoid the result def.
1 parent 8615eeb commit 9b13b4a

File tree

4 files changed

+76
-28
lines changed

4 files changed

+76
-28
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,8 @@ def ADJCALLSTACKDOWN : SPseudoInstSI<
513513
let Defs = [M0, EXEC, SCC],
514514
UseNamedOperandTable = 1 in {
515515

516+
// SI_INDIRECT_SRC/DST are only used by legacy SelectionDAG indirect
517+
// addressing implementation.
516518
class SI_INDIRECT_SRC<RegisterClass rc> : VPseudoInstSI <
517519
(outs VGPR_32:$vdst),
518520
(ins rc:$src, VS_32:$idx, i32imm:$offset)> {
@@ -526,7 +528,6 @@ class SI_INDIRECT_DST<RegisterClass rc> : VPseudoInstSI <
526528
let usesCustomInserter = 1;
527529
}
528530

529-
// TODO: We can support indirect SGPR access.
530531
def SI_INDIRECT_SRC_V1 : SI_INDIRECT_SRC<VGPR_32>;
531532
def SI_INDIRECT_SRC_V2 : SI_INDIRECT_SRC<VReg_64>;
532533
def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC<VReg_128>;
@@ -541,6 +542,65 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
541542

542543
} // End Uses = [EXEC], Defs = [M0, EXEC]
543544

545+
546+
// This is a pseudo variant of the v_movreld_b32 (or v_mov_b32
547+
// expecting to be executed with gpr indexing mode enabled)
548+
// instruction in which the vector operand appears only twice, once as
549+
// def and once as use. Using this pseudo avoids problems with the Two
550+
// Address instructions pass.
551+
class INDIRECT_REG_WRITE_pseudo<RegisterClass rc,
552+
RegisterOperand val_ty> : PseudoInstSI <
553+
(outs rc:$vdst), (ins rc:$vsrc, val_ty:$val, i32imm:$subreg)> {
554+
let Constraints = "$vsrc = $vdst";
555+
let Uses = [M0];
556+
}
557+
558+
class V_INDIRECT_REG_WRITE_B32_pseudo<RegisterClass rc> :
559+
INDIRECT_REG_WRITE_pseudo<rc, VSrc_b32> {
560+
let VALU = 1;
561+
let VOP1 = 1;
562+
let Uses = [M0, EXEC];
563+
}
564+
565+
class S_INDIRECT_REG_WRITE_pseudo<RegisterClass rc,
566+
RegisterOperand val_ty> :
567+
INDIRECT_REG_WRITE_pseudo<rc, val_ty> {
568+
let SALU = 1;
569+
let SOP1 = 1;
570+
let Uses = [M0];
571+
}
572+
573+
class S_INDIRECT_REG_WRITE_B32_pseudo<RegisterClass rc> :
574+
S_INDIRECT_REG_WRITE_pseudo<rc, SSrc_b32>;
575+
class S_INDIRECT_REG_WRITE_B64_pseudo<RegisterClass rc> :
576+
S_INDIRECT_REG_WRITE_pseudo<rc, SSrc_b64>;
577+
578+
579+
def V_INDIRECT_REG_WRITE_B32_V1 : V_INDIRECT_REG_WRITE_B32_pseudo<VGPR_32>;
580+
def V_INDIRECT_REG_WRITE_B32_V2 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_64>;
581+
def V_INDIRECT_REG_WRITE_B32_V3 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_96>;
582+
def V_INDIRECT_REG_WRITE_B32_V4 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_128>;
583+
def V_INDIRECT_REG_WRITE_B32_V5 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_160>;
584+
def V_INDIRECT_REG_WRITE_B32_V8 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_256>;
585+
def V_INDIRECT_REG_WRITE_B32_V16 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_512>;
586+
def V_INDIRECT_REG_WRITE_B32_V32 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_1024>;
587+
588+
def S_INDIRECT_REG_WRITE_B32_V1 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_32>;
589+
def S_INDIRECT_REG_WRITE_B32_V2 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_64>;
590+
def S_INDIRECT_REG_WRITE_B32_V3 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_96>;
591+
def S_INDIRECT_REG_WRITE_B32_V4 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_128>;
592+
def S_INDIRECT_REG_WRITE_B32_V5 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_160>;
593+
def S_INDIRECT_REG_WRITE_B32_V8 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_256>;
594+
def S_INDIRECT_REG_WRITE_B32_V16 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_512>;
595+
def S_INDIRECT_REG_WRITE_B32_V32 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_1024>;
596+
597+
def S_INDIRECT_REG_WRITE_B64_V1 : S_INDIRECT_REG_WRITE_B64_pseudo<SReg_64>;
598+
def S_INDIRECT_REG_WRITE_B64_V2 : S_INDIRECT_REG_WRITE_B64_pseudo<SReg_128>;
599+
def S_INDIRECT_REG_WRITE_B64_V4 : S_INDIRECT_REG_WRITE_B64_pseudo<SReg_256>;
600+
def S_INDIRECT_REG_WRITE_B64_V8 : S_INDIRECT_REG_WRITE_B64_pseudo<SReg_512>;
601+
def S_INDIRECT_REG_WRITE_B64_V16 : S_INDIRECT_REG_WRITE_B64_pseudo<SReg_1024>;
602+
603+
544604
multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
545605
let UseNamedOperandTable = 1, SGPRSpill = 1, Uses = [EXEC] in {
546606
def _SAVE : PseudoInstSI <

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,17 @@ class SOP1_0_32 <string opName, list<dag> pattern = []> : SOP1_Pseudo <
9797
let has_sdst = 0;
9898
}
9999

100+
// Special case for movreld where sdst is treated as a use operand.
101+
class SOP1_32_movreld <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
102+
opName, (outs), (ins SReg_32:$sdst, SSrc_b32:$src0),
103+
"$sdst, $src0", pattern>;
104+
105+
// Special case for movreld where sdst is treated as a use operand.
106+
class SOP1_64_movreld <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
107+
opName, (outs), (ins SReg_64:$sdst, SSrc_b64:$src0),
108+
"$sdst, $src0", pattern
109+
>;
110+
100111
class SOP1_0_32R <string opName, list<dag> pattern = []> : SOP1_Pseudo <
101112
opName, (outs), (ins SReg_32:$src0),
102113
"$src0", pattern> {
@@ -267,8 +278,8 @@ def S_QUADMASK_B64 : SOP1_64 <"s_quadmask_b64">;
267278
let Uses = [M0] in {
268279
def S_MOVRELS_B32 : SOP1_32R <"s_movrels_b32">;
269280
def S_MOVRELS_B64 : SOP1_64R <"s_movrels_b64">;
270-
def S_MOVRELD_B32 : SOP1_32 <"s_movreld_b32">;
271-
def S_MOVRELD_B64 : SOP1_64 <"s_movreld_b64">;
281+
def S_MOVRELD_B32 : SOP1_32_movreld <"s_movreld_b32">;
282+
def S_MOVRELD_B64 : SOP1_64_movreld <"s_movreld_b64">;
272283
} // End Uses = [M0]
273284

274285
let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in {

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -812,29 +812,6 @@ def V_MOV_B32_indirect : VPseudoInstSI<(outs),
812812
let SubtargetPredicate = isGFX8GFX9;
813813
}
814814

815-
// This is a pseudo variant of the v_movreld_b32 (or v_mov_b32
816-
// expecting to be executed with gpr indexing mode enabled)
817-
// instruction in which the vector operand appears only twice, once as
818-
// def and once as use. Using this pseudo avoids problems with the Two
819-
// Address instructions pass.
820-
class V_INDIRECT_REG_WRITE_B32_pseudo<RegisterClass rc> : VPseudoInstSI <
821-
(outs rc:$vdst),
822-
(ins rc:$vsrc, VSrc_b32:$val, i32imm:$subreg)> {
823-
let VOP1 = 1;
824-
825-
let Constraints = "$vsrc = $vdst";
826-
let Uses = [M0, EXEC];
827-
}
828-
829-
def V_INDIRECT_REG_WRITE_B32_V1 : V_INDIRECT_REG_WRITE_B32_pseudo<VGPR_32>;
830-
def V_INDIRECT_REG_WRITE_B32_V2 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_64>;
831-
def V_INDIRECT_REG_WRITE_B32_V3 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_96>;
832-
def V_INDIRECT_REG_WRITE_B32_V4 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_128>;
833-
def V_INDIRECT_REG_WRITE_B32_V5 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_160>;
834-
def V_INDIRECT_REG_WRITE_B32_V8 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_256>;
835-
def V_INDIRECT_REG_WRITE_B32_V16 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_512>;
836-
def V_INDIRECT_REG_WRITE_B32_V32 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_1024>;
837-
838815
let OtherPredicates = [isGFX8Plus] in {
839816

840817
def : GCNPat <

llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,12 +422,12 @@ body: |
422422
423423
bb.2:
424424
$m0 = S_MOV_B32 0
425-
$sgpr0 = S_MOVRELD_B32 $sgpr0, implicit $m0
425+
S_MOVRELD_B32 $sgpr0, $sgpr0, implicit $m0
426426
S_BRANCH %bb.3
427427
428428
bb.3:
429429
$m0 = S_MOV_B32 0
430-
$sgpr0_sgpr1 = S_MOVRELD_B64 $sgpr0_sgpr1, implicit $m0
430+
S_MOVRELD_B64 $sgpr0_sgpr1, $sgpr0_sgpr1, implicit $m0
431431
S_ENDPGM 0
432432
...
433433

0 commit comments

Comments
 (0)