Skip to content

Commit bef10c9

Browse files
committed
WIP: AMDGPU: Implement getRegSequenceLikeInputs for v_pk_mov_b32
In principle we need this analysis to avoid regressions when using v_pk_mov_b32 when shuffling to physical register inputs. However, as it stands this only introduces regressions by decomposing every useful case where we benefit from the instruction.
1 parent 10c3b75 commit bef10c9

13 files changed

+2242
-1698
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9725,6 +9725,40 @@ MachineInstr *SIInstrInfo::foldMemoryOperandImpl(
97259725
return nullptr;
97269726
}
97279727

9728+
bool SIInstrInfo::getRegSequenceLikeInputs(
9729+
const MachineInstr &MI, unsigned DefIdx,
9730+
SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
9731+
assert(MI.getOpcode() == AMDGPU::V_PK_MOV_B32 &&
9732+
"v_pk_mov_b32 is the only reg-sequence like instruction");
9733+
assert(DefIdx == 0);
9734+
9735+
unsigned Src0Mods = MI.getOperand(1).getImm();
9736+
const MachineOperand &Src0 = MI.getOperand(2);
9737+
unsigned Src1Mods = MI.getOperand(3).getImm();
9738+
const MachineOperand &Src1 = MI.getOperand(4);
9739+
9740+
unsigned SubReg0 =
9741+
Src0Mods & SISrcMods::OP_SEL_0 ? AMDGPU::sub1 : AMDGPU::sub0;
9742+
unsigned SubReg1 =
9743+
Src1Mods & SISrcMods::OP_SEL_0 ? AMDGPU::sub1 : AMDGPU::sub0;
9744+
9745+
if (!Src0.isUndef()) {
9746+
// src0 will provide the result sub0 from src0.
9747+
SubReg0 = RI.composeSubRegIndices(Src0.getSubReg(), SubReg0);
9748+
InputRegs.push_back(
9749+
RegSubRegPairAndIdx(Src0.getReg(), SubReg0, AMDGPU::sub0));
9750+
}
9751+
9752+
if (!Src1.isUndef()) {
9753+
// src1 will provide the result's sub1 from src1.
9754+
SubReg1 = RI.composeSubRegIndices(Src1.getSubReg(), SubReg1);
9755+
InputRegs.push_back(
9756+
RegSubRegPairAndIdx(Src1.getReg(), SubReg1, AMDGPU::sub1));
9757+
}
9758+
9759+
return true;
9760+
}
9761+
97289762
unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
97299763
const MachineInstr &MI,
97309764
unsigned *PredCost) const {

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,6 +1437,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
14371437
int FrameIndex,
14381438
LiveIntervals *LIS = nullptr,
14391439
VirtRegMap *VRM = nullptr) const override;
1440+
bool getRegSequenceLikeInputs(
1441+
const MachineInstr &MI, unsigned DefIdx,
1442+
SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const override;
14401443

14411444
unsigned getInstrLatency(const InstrItineraryData *ItinData,
14421445
const MachineInstr &MI,

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1107,7 +1107,7 @@ let isCommutable = 1, isReMaterializable = 1 in {
11071107
defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fadd>;
11081108
} // End SubtargetPredicate = HasPackedFP32Ops
11091109

1110-
let SubtargetPredicate = HasPkMovB32 in
1110+
let SubtargetPredicate = HasPkMovB32, isRegSequence = 1 in
11111111
defm V_PK_MOV_B32 : VOP3PInst<"v_pk_mov_b32", VOP3P_Profile<VOP_V2I32_V2I32_V2I32, VOP3_PACKED>>;
11121112
} // End isCommutable = 1, isReMaterializable = 1
11131113

llvm/test/CodeGen/AMDGPU/reg-sequence-like-v-pk-mov-b32.mir

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ body: |
1515
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1616
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
1717
; CHECK-NEXT: [[V_PK_MOV_B32_:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 12, [[REG_SEQUENCE]], 8, [[REG_SEQUENCE]], 0, 0, 0, 0, 0, implicit $exec
18-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub1
19-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub0
18+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
19+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
2020
; CHECK-NEXT: $vgpr4 = COPY [[COPY2]]
2121
; CHECK-NEXT: $vgpr5 = COPY [[COPY3]]
2222
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr4, implicit $vgpr5
@@ -49,8 +49,8 @@ body: |
4949
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
5050
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
5151
; CHECK-NEXT: [[V_PK_MOV_B32_:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 12, [[REG_SEQUENCE]], 8, [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec
52-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub1
53-
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub0
52+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
53+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
5454
; CHECK-NEXT: $vgpr4 = COPY [[COPY4]]
5555
; CHECK-NEXT: $vgpr5 = COPY [[COPY5]]
5656
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr4, implicit $vgpr5
@@ -118,7 +118,7 @@ body: |
118118
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
119119
; CHECK-NEXT: [[V_PK_MOV_B32_:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 12, [[REG_SEQUENCE]], 8, [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec
120120
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub1
121-
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub0
121+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
122122
; CHECK-NEXT: $vgpr4 = COPY [[COPY4]]
123123
; CHECK-NEXT: $vgpr5 = COPY [[COPY5]]
124124
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr4, implicit $vgpr5
@@ -154,7 +154,7 @@ body: |
154154
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
155155
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
156156
; CHECK-NEXT: [[V_PK_MOV_B32_:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 12, [[REG_SEQUENCE]], 8, [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec
157-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub1
157+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
158158
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub0
159159
; CHECK-NEXT: $vgpr4 = COPY [[COPY4]]
160160
; CHECK-NEXT: $vgpr5 = COPY [[COPY5]]
@@ -191,7 +191,7 @@ body: |
191191
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
192192
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
193193
; CHECK-NEXT: [[V_PK_MOV_B32_:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 12, undef [[REG_SEQUENCE]], 8, [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec
194-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub1
194+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
195195
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub0
196196
; CHECK-NEXT: $vgpr4 = COPY [[COPY4]]
197197
; CHECK-NEXT: $vgpr5 = COPY [[COPY5]]
@@ -229,7 +229,7 @@ body: |
229229
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
230230
; CHECK-NEXT: [[V_PK_MOV_B32_:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 12, [[REG_SEQUENCE]], 8, undef [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec
231231
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub1
232-
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub0
232+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
233233
; CHECK-NEXT: $vgpr4 = COPY [[COPY4]]
234234
; CHECK-NEXT: $vgpr5 = COPY [[COPY5]]
235235
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr4, implicit $vgpr5
@@ -304,8 +304,8 @@ body: |
304304
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2
305305
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2
306306
; CHECK-NEXT: [[V_PK_MOV_B32_:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 12, [[REG_SEQUENCE]].sub0_sub1, 8, [[REG_SEQUENCE1]].sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
307-
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub1
308-
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub0
307+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
308+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
309309
; CHECK-NEXT: $vgpr4 = COPY [[COPY6]]
310310
; CHECK-NEXT: $vgpr5 = COPY [[COPY7]]
311311
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr4, implicit $vgpr5
@@ -345,8 +345,8 @@ body: |
345345
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2
346346
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2
347347
; CHECK-NEXT: [[V_PK_MOV_B32_:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 8, [[REG_SEQUENCE]].sub0_sub1, 12, [[REG_SEQUENCE1]].sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
348-
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub1
349-
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub0
348+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
349+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
350350
; CHECK-NEXT: $vgpr4 = COPY [[COPY6]]
351351
; CHECK-NEXT: $vgpr5 = COPY [[COPY7]]
352352
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr4, implicit $vgpr5
@@ -388,8 +388,8 @@ body: |
388388
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
389389
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
390390
; CHECK-NEXT: [[V_PK_MOV_B32_:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 12, [[REG_SEQUENCE]].sub2_sub3, 12, [[REG_SEQUENCE1]].sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
391-
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub1
392-
; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_PK_MOV_B32_]].sub0
391+
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
392+
; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3
393393
; CHECK-NEXT: $vgpr4 = COPY [[COPY8]]
394394
; CHECK-NEXT: $vgpr5 = COPY [[COPY9]]
395395
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr4, implicit $vgpr5

0 commit comments

Comments
 (0)