-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Teach isOperandLegal about SALU literal restrictions #127626
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Teach isOperandLegal about SALU literal restrictions #127626
Conversation
isOperandLegal mostly implemented the VALU operand rules, and largely ignored SALU restrictions. This theoretically avoids folding literals into SALU insts which already have a literal operand. This issue is currently avoided due to a bug in SIFoldOperands; this change will allow using raw operand legality rules. This breaks the formation of s_fmaak_f32 in SIFoldOperands, but it probably should not have been forming there in the first place. TwoAddressInsts or RA should generally handle that, and this only worked by accident.
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesisOperandLegal mostly implemented the VALU operand rules, and This breaks the formation of s_fmaak_f32 in SIFoldOperands, Full diff: https://github.com/llvm/llvm-project/pull/127626.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ceab6c9dcca34..7dace11d208a0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5931,11 +5931,15 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
if (!MO)
MO = &MI.getOperand(OpIdx);
- const MachineOperand *UsedLiteral = nullptr;
+ const bool IsInlineConst = !MO->isReg() && isInlineConstant(*MO, OpInfo);
- int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
- int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0;
- if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
+ if (isVALU(MI) && !IsInlineConst && usesConstantBus(MRI, *MO, OpInfo)) {
+ const MachineOperand *UsedLiteral = nullptr;
+
+ int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
+ int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0;
+
+ // TODO: Be more permissive with frame indexes.
if (!MO->isReg() && !isInlineConstant(*MO, OpInfo)) {
if (!LiteralLimit--)
return false;
@@ -5974,9 +5978,19 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
return false;
}
}
- } else if (ST.hasNoF16PseudoScalarTransInlineConstants() && !MO->isReg() &&
- isF16PseudoScalarTrans(MI.getOpcode()) &&
- isInlineConstant(*MO, OpInfo)) {
+ } else if (!IsInlineConst && !MO->isReg() && isSALU(MI)) {
+ // There can be at most one literal operand, but it can be repeated.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (i == OpIdx)
+ continue;
+ const MachineOperand &Op = MI.getOperand(i);
+ if (!Op.isReg() && !Op.isFI() &&
+ !isInlineConstant(Op, InstDesc.operands()[i]) &&
+ !Op.isIdenticalTo(*MO))
+ return false;
+ }
+ } else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
+ isF16PseudoScalarTrans(MI.getOpcode())) {
return false;
}
diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir
index 08693ec9db1d4..2492eb2982aac 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir
@@ -133,7 +133,8 @@ body: |
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1234567890, 1056964608, implicit $mode
+ ; CHECK-NEXT: %noninlinable:sreg_32 = S_MOV_B32 1234567890
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 %noninlinable, [[COPY]], 1056964608, implicit $mode
; CHECK-NEXT: $sgpr0 = COPY %fma
%0:sreg_32 = COPY $sgpr0
%inlinable:sreg_32 = S_MOV_B32 1056964608
@@ -152,7 +153,8 @@ body: |
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1234567890, 1056964608, implicit $mode
+ ; CHECK-NEXT: %noninlinable:sreg_32 = S_MOV_B32 1234567890
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 [[COPY]], %noninlinable, 1056964608, implicit $mode
; CHECK-NEXT: $sgpr0 = COPY %fma
%0:sreg_32 = COPY $sgpr0
%inlinable:sreg_32 = S_MOV_B32 1056964608
@@ -210,7 +212,8 @@ body: |
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 [[COPY]], 1056964608, 1234567890, implicit $mode
+ ; CHECK-NEXT: %noninlinable:sreg_32 = S_MOV_B32 1234567890
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1056964608, %noninlinable, implicit $mode
; CHECK-NEXT: $sgpr0 = COPY %fma
%0:sreg_32 = COPY $sgpr0
%inlinable:sreg_32 = S_MOV_B32 1056964608
diff --git a/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir b/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir
index 5f985605c082d..c8afb89aa272a 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir
@@ -69,3 +69,202 @@ body: |
%0:sreg_32 = S_MOV_B32 63
%1:sreg_32 = S_ADD_I32 %stack.0, %0, implicit-def $scc
...
+
+# GCN-LABEL: name: test_no_fold_literal_already_inline_lhs{{$}}
+# GCN: %0:sreg_32 = S_MOV_B32 80
+# GCN-NEXT: %1:sreg_32 = S_ADD_I32 70, %0
+---
+name: test_no_fold_literal_already_inline_lhs
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 80
+ %1:sreg_32 = S_ADD_I32 70, %0, implicit-def $scc
+...
+
+# GCN-LABEL: name: test_no_fold_literal_already_inline_rhs{{$}}
+# GCN: %0:sreg_32 = S_MOV_B32 80
+# GCN-NEXT: %1:sreg_32 = S_ADD_I32 %0, 70
+---
+name: test_no_fold_literal_already_inline_rhs
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 80
+ %1:sreg_32 = S_ADD_I32 %0, 70, implicit-def $scc
+...
+
+# GCN-LABEL: name: test_fold_literal_inlineimm_lhs{{$}}
+# GCN: %1:sreg_32 = S_ADD_I32 64, 80
+---
+name: test_fold_literal_inlineimm_lhs
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 80
+ %1:sreg_32 = S_ADD_I32 64, %0, implicit-def $scc
+...
+
+# GCN-LABEL: name: test_fold_literal_inlineimm_rhs{{$}}
+# GCN: %1:sreg_32 = S_ADD_I32 80, 64
+---
+name: test_fold_literal_inlineimm_rhs
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 80
+ %1:sreg_32 = S_ADD_I32 %0, 64, implicit-def $scc
+...
+
+# GCN-LABEL: name: test_fold_same_literal_2x{{$}}
+# GCN: %2:sreg_32 = S_ADD_I32 70, %1
+---
+name: test_fold_same_literal_2x
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 70
+ %1:sreg_32 = S_MOV_B32 70
+ %2:sreg_32 = S_ADD_I32 %0, %1, implicit-def $scc
+...
+
+# GCN-LABEL: name: test_fold_same_literal_lhs{{$}}
+# GCN: %1:sreg_32 = S_ADD_I32 70, %0
+---
+name: test_fold_same_literal_lhs
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 70
+ %1:sreg_32 = S_ADD_I32 70, %0, implicit-def $scc
+...
+
+# GCN-LABEL: name: test_fold_same_literal_rhs{{$}}
+# GCN: %1:sreg_32 = S_ADD_I32 %0, 70
+---
+name: test_fold_same_literal_rhs
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 70
+ %1:sreg_32 = S_ADD_I32 %0, 70, implicit-def $scc
+...
+
+
+# GCN-LABEL: name: test_s_cselect_b32_2x_literal_fold{{$}}
+# GCN: %2:sreg_32 = S_CSELECT_B32 70, %1, implicit $scc
+---
+name: test_s_cselect_b32_2x_literal_fold
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 70
+ %1:sreg_32 = S_MOV_B32 80
+ $scc = IMPLICIT_DEF
+ %2:sreg_32 = S_CSELECT_B32 %0, %1, implicit $scc
+...
+
+# GCN-LABEL: name: test_s_cselect_b32_fold_literal_literal_lhs{{$}}
+# GCN: %1:sreg_32 = S_CSELECT_B32 70, %0, implicit $scc
+---
+name: test_s_cselect_b32_fold_literal_literal_lhs
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 80
+ $scc = IMPLICIT_DEF
+ %1:sreg_32 = S_CSELECT_B32 70, %0, implicit $scc
+...
+
+# GCN-LABEL: name: test_s_cselect_b32_fold_literal_literal_rhs{{$}}
+# GCN: %1:sreg_32 = S_CSELECT_B32 %0, 70, implicit $scc
+---
+name: test_s_cselect_b32_fold_literal_literal_rhs
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 80
+ $scc = IMPLICIT_DEF
+ %1:sreg_32 = S_CSELECT_B32 %0, 70, implicit $scc
+...
+
+# GCN-LABEL: name: test_s_cselect_b32_fold_literal_inlineimm_lhs{{$}}
+# GCN: %1:sreg_32 = S_CSELECT_B32 64, 80, implicit $scc
+---
+name: test_s_cselect_b32_fold_literal_inlineimm_lhs
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 80
+ $scc = IMPLICIT_DEF
+ %1:sreg_32 = S_CSELECT_B32 64, %0, implicit $scc
+...
+
+# GCN-LABEL: name: test_s_cselect_b32_fold_literal_inlineimm_rhs{{$}}
+# GCN: %1:sreg_32 = S_CSELECT_B32 80, 64, implicit $scc
+---
+name: test_s_cselect_b32_fold_literal_inlineimm_rhs
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 80
+ $scc = IMPLICIT_DEF
+ %1:sreg_32 = S_CSELECT_B32 %0, 64, implicit $scc
+...
+
+# GCN-LABEL: name: test_s_cmp_b32_2x_literal_fold{{$}}
+# GCN: S_CMP_EQ_U32 70, %1, implicit-def $scc
+---
+name: test_s_cmp_b32_2x_literal_fold
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 70
+ %1:sreg_32 = S_MOV_B32 80
+ $scc = IMPLICIT_DEF
+ S_CMP_EQ_U32 %0, %1, implicit-def $scc
+...
+
+# GCN-LABEL: name: test_s_cmp_b32_literal_literal_lhs{{$}}
+# GCN: S_CMP_EQ_U32 70, %0, implicit-def $scc
+---
+name: test_s_cmp_b32_literal_literal_lhs
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 80
+ S_CMP_EQ_U32 70, %0, implicit-def $scc
+...
+
+# GCN-LABEL: name: test_s_cmp_b32_literal_literal_rhs{{$}}
+# GCN: S_CMP_EQ_U32 %0, 70, implicit-def $scc
+---
+name: test_s_cmp_b32_literal_literal_rhs
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 80
+ S_CMP_EQ_U32 %0, 70, implicit-def $scc
+...
+
+# GCN-LABEL: name: test_s_cmp_b32_literal_inlineimm_lhs{{$}}
+# GCN: S_CMP_EQ_U32 64, 80, implicit-def $scc
+---
+name: test_s_cmp_b32_literal_inlineimm_lhs
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 80
+ S_CMP_EQ_U32 64, %0, implicit-def $scc
+...
+
+# GCN-LABEL: name: test_s_cmp_b32_literal_inlineimm_rhs{{$}}
+# GCN: S_CMP_EQ_U32 80, 64, implicit-def $scc
+---
+name: test_s_cmp_b32_literal_inlineimm_rhs
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 80
+ S_CMP_EQ_U32 %0, 64, implicit-def $scc
+...
|
isOperandLegal mostly implemented the VALU operand rules, and
largely ignored SALU restrictions. This theoretically avoids
folding literals into SALU insts which already have a literal
operand. This issue is currently avoided due to a bug in
SIFoldOperands; this change will allow using raw operand
legality rules.
This breaks the formation of s_fmaak_f32 in SIFoldOperands,
but it probably should not have been forming there in the first
place. TwoAddressInsts or RA should generally handle that,
and this only worked by accident.