Skip to content

Commit 3fa5707

Browse files
arsenmsvkeerthy
authored andcommitted
AMDGPU: Remove redundant operand folding checks (#140587)
This was pre-filtering out a specific situation from being added to the fold candidate list. The operand legality will ultimately be checked with isOperandLegal before the fold is performed, so I don't see the plus in pre-filtering this one case.
1 parent bebe142 commit 3fa5707

File tree

3 files changed

+107
-18
lines changed

3 files changed

+107
-18
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -777,24 +777,6 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
777777
return true;
778778
}
779779

780-
// Check the case where we might introduce a second constant operand to a
781-
// scalar instruction
782-
if (TII->isSALU(MI->getOpcode())) {
783-
const MCInstrDesc &InstDesc = MI->getDesc();
784-
const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo];
785-
786-
// Fine if the operand can be encoded as an inline constant
787-
if (!OpToFold->isReg() && !TII->isInlineConstant(*OpToFold, OpInfo)) {
788-
// Otherwise check for another constant
789-
for (unsigned i = 0, e = InstDesc.getNumOperands(); i != e; ++i) {
790-
auto &Op = MI->getOperand(i);
791-
if (OpNo != i && !Op.isReg() &&
792-
!TII->isInlineConstant(Op, InstDesc.operands()[i]))
793-
return false;
794-
}
795-
}
796-
}
797-
798780
appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
799781
return true;
800782
}

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6063,6 +6063,12 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
60636063
!isInlineConstant(Op, InstDesc.operands()[i]) &&
60646064
!Op.isIdenticalTo(*MO))
60656065
return false;
6066+
6067+
// Do not fold a frame index into an instruction that already has a frame
6068+
// index. The frame index handling code doesn't handle fixing up operand
6069+
// constraints if there are multiple indexes.
6070+
if (Op.isFI() && MO->isFI())
6071+
return false;
60666072
}
60676073
} else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
60686074
isF16PseudoScalarTrans(MI.getOpcode())) {

llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,3 +573,104 @@ body: |
573573
S_ENDPGM 0, implicit %2
574574
575575
...
576+
577+
---
578+
name: no_fold_multiple_fi_s_cselect_b32
579+
tracksRegLiveness: true
580+
stack:
581+
- { id: 0, size: 64, alignment: 4 }
582+
- { id: 1, size: 32, alignment: 4 }
583+
body: |
584+
bb.0:
585+
; CHECK-LABEL: name: no_fold_multiple_fi_s_cselect_b32
586+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.1
587+
; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 killed [[S_MOV_B32_]], %stack.0, implicit undef $scc
588+
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
589+
%0:sreg_32 = S_MOV_B32 %stack.0
590+
%1:sreg_32 = S_MOV_B32 %stack.1
591+
%2:sreg_32 = S_CSELECT_B32 killed %1, killed %0, implicit undef $scc
592+
S_ENDPGM 0, implicit %2
593+
594+
...
595+
596+
---
597+
name: no_fold_multiple_fi_v_cndmask_b32_e64
598+
tracksRegLiveness: true
599+
stack:
600+
- { id: 0, size: 64, alignment: 4 }
601+
- { id: 1, size: 32, alignment: 4 }
602+
body: |
603+
bb.0:
604+
liveins: $sgpr8_sgpr9
605+
; GFX9-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e64
606+
; GFX9: liveins: $sgpr8_sgpr9
607+
; GFX9-NEXT: {{ $}}
608+
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9
609+
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
610+
; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
611+
; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[V_MOV_B32_e32_]], 0, killed [[V_MOV_B32_e32_1]], [[COPY]], implicit $exec
612+
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
613+
;
614+
; GFX10-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e64
615+
; GFX10: liveins: $sgpr8_sgpr9
616+
; GFX10-NEXT: {{ $}}
617+
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9
618+
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
619+
; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, %stack.0, 0, killed [[V_MOV_B32_e32_]], [[COPY]], implicit $exec
620+
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
621+
;
622+
; GFX12-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e64
623+
; GFX12: liveins: $sgpr8_sgpr9
624+
; GFX12-NEXT: {{ $}}
625+
; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9
626+
; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
627+
; GFX12-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, %stack.0, 0, killed [[V_MOV_B32_e32_]], [[COPY]], implicit $exec
628+
; GFX12-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
629+
%0:sreg_64_xexec = COPY $sgpr8_sgpr9
630+
%1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
631+
%2:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
632+
%3:vgpr_32 = V_CNDMASK_B32_e64 0, killed %1, 0, killed %2, %0, implicit $exec
633+
S_ENDPGM 0, implicit %3
634+
635+
...
636+
637+
---
638+
name: no_fold_multiple_fi_v_cndmask_b32_e32
639+
tracksRegLiveness: true
640+
stack:
641+
- { id: 0, size: 64, alignment: 4 }
642+
- { id: 1, size: 32, alignment: 4 }
643+
body: |
644+
bb.0:
645+
liveins: $sgpr8_sgpr9
646+
; GFX9-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e32
647+
; GFX9: liveins: $sgpr8_sgpr9
648+
; GFX9-NEXT: {{ $}}
649+
; GFX9-NEXT: $vcc = COPY $sgpr8_sgpr9
650+
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
651+
; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
652+
; GFX9-NEXT: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 killed [[V_MOV_B32_e32_]], killed [[V_MOV_B32_e32_1]], implicit $vcc, implicit $exec
653+
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e32_]]
654+
;
655+
; GFX10-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e32
656+
; GFX10: liveins: $sgpr8_sgpr9
657+
; GFX10-NEXT: {{ $}}
658+
; GFX10-NEXT: $vcc = COPY $sgpr8_sgpr9
659+
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
660+
; GFX10-NEXT: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 %stack.0, killed [[V_MOV_B32_e32_]], implicit $vcc, implicit $exec
661+
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e32_]]
662+
;
663+
; GFX12-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e32
664+
; GFX12: liveins: $sgpr8_sgpr9
665+
; GFX12-NEXT: {{ $}}
666+
; GFX12-NEXT: $vcc = COPY $sgpr8_sgpr9
667+
; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
668+
; GFX12-NEXT: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 %stack.0, killed [[V_MOV_B32_e32_]], implicit $vcc, implicit $exec
669+
; GFX12-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e32_]]
670+
$vcc = COPY $sgpr8_sgpr9
671+
%1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
672+
%2:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
673+
%3:vgpr_32 = V_CNDMASK_B32_e32 killed %1, killed %2, implicit $vcc, implicit $exec
674+
S_ENDPGM 0, implicit %3
675+
676+
...

0 commit comments

Comments
 (0)