Skip to content

AMDGPU: Teach isOperandLegal about SALU literal restrictions #127626

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5931,11 +5931,15 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
if (!MO)
MO = &MI.getOperand(OpIdx);

const MachineOperand *UsedLiteral = nullptr;
const bool IsInlineConst = !MO->isReg() && isInlineConstant(*MO, OpInfo);

int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0;
if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
if (isVALU(MI) && !IsInlineConst && usesConstantBus(MRI, *MO, OpInfo)) {
const MachineOperand *UsedLiteral = nullptr;

int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0;

// TODO: Be more permissive with frame indexes.
if (!MO->isReg() && !isInlineConstant(*MO, OpInfo)) {
if (!LiteralLimit--)
return false;
Expand Down Expand Up @@ -5974,9 +5978,19 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
return false;
}
}
} else if (ST.hasNoF16PseudoScalarTransInlineConstants() && !MO->isReg() &&
isF16PseudoScalarTrans(MI.getOpcode()) &&
isInlineConstant(*MO, OpInfo)) {
} else if (!IsInlineConst && !MO->isReg() && isSALU(MI)) {
// There can be at most one literal operand, but it can be repeated.
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
if (i == OpIdx)
continue;
const MachineOperand &Op = MI.getOperand(i);
if (!Op.isReg() && !Op.isFI() &&
!isInlineConstant(Op, InstDesc.operands()[i]) &&
!Op.isIdenticalTo(*MO))
return false;
}
} else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
isF16PseudoScalarTrans(MI.getOpcode())) {
return false;
}

Expand Down
9 changes: 6 additions & 3 deletions llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ body: |
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1234567890, 1056964608, implicit $mode
; CHECK-NEXT: %noninlinable:sreg_32 = S_MOV_B32 1234567890
; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 %noninlinable, [[COPY]], 1056964608, implicit $mode
; CHECK-NEXT: $sgpr0 = COPY %fma
%0:sreg_32 = COPY $sgpr0
%inlinable:sreg_32 = S_MOV_B32 1056964608
Expand All @@ -152,7 +153,8 @@ body: |
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1234567890, 1056964608, implicit $mode
; CHECK-NEXT: %noninlinable:sreg_32 = S_MOV_B32 1234567890
; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 [[COPY]], %noninlinable, 1056964608, implicit $mode
; CHECK-NEXT: $sgpr0 = COPY %fma
%0:sreg_32 = COPY $sgpr0
%inlinable:sreg_32 = S_MOV_B32 1056964608
Expand Down Expand Up @@ -210,7 +212,8 @@ body: |
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 [[COPY]], 1056964608, 1234567890, implicit $mode
; CHECK-NEXT: %noninlinable:sreg_32 = S_MOV_B32 1234567890
; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1056964608, %noninlinable, implicit $mode
; CHECK-NEXT: $sgpr0 = COPY %fma
%0:sreg_32 = COPY $sgpr0
%inlinable:sreg_32 = S_MOV_B32 1056964608
Expand Down
199 changes: 199 additions & 0 deletions llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,202 @@ body: |
%0:sreg_32 = S_MOV_B32 63
%1:sreg_32 = S_ADD_I32 %stack.0, %0, implicit-def $scc
...

# GCN-LABEL: name: test_no_fold_literal_already_inline_lhs{{$}}
# GCN: %0:sreg_32 = S_MOV_B32 80
# GCN-NEXT: %1:sreg_32 = S_ADD_I32 70, %0
---
name: test_no_fold_literal_already_inline_lhs
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 80
%1:sreg_32 = S_ADD_I32 70, %0, implicit-def $scc
...

# GCN-LABEL: name: test_no_fold_literal_already_inline_rhs{{$}}
# GCN: %0:sreg_32 = S_MOV_B32 80
# GCN-NEXT: %1:sreg_32 = S_ADD_I32 %0, 70
---
name: test_no_fold_literal_already_inline_rhs
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 80
%1:sreg_32 = S_ADD_I32 %0, 70, implicit-def $scc
...

# GCN-LABEL: name: test_fold_literal_inlineimm_lhs{{$}}
# GCN: %1:sreg_32 = S_ADD_I32 64, 80
---
name: test_fold_literal_inlineimm_lhs
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 80
%1:sreg_32 = S_ADD_I32 64, %0, implicit-def $scc
...

# GCN-LABEL: name: test_fold_literal_inlineimm_rhs{{$}}
# GCN: %1:sreg_32 = S_ADD_I32 80, 64
---
name: test_fold_literal_inlineimm_rhs
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 80
%1:sreg_32 = S_ADD_I32 %0, 64, implicit-def $scc
...

# GCN-LABEL: name: test_fold_same_literal_2x{{$}}
# GCN: %2:sreg_32 = S_ADD_I32 70, %1
---
name: test_fold_same_literal_2x
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 70
%1:sreg_32 = S_MOV_B32 70
%2:sreg_32 = S_ADD_I32 %0, %1, implicit-def $scc
...

# GCN-LABEL: name: test_fold_same_literal_lhs{{$}}
# GCN: %1:sreg_32 = S_ADD_I32 70, %0
---
name: test_fold_same_literal_lhs
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 70
%1:sreg_32 = S_ADD_I32 70, %0, implicit-def $scc
...

# GCN-LABEL: name: test_fold_same_literal_rhs{{$}}
# GCN: %1:sreg_32 = S_ADD_I32 %0, 70
---
name: test_fold_same_literal_rhs
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 70
%1:sreg_32 = S_ADD_I32 %0, 70, implicit-def $scc
...


# GCN-LABEL: name: test_s_cselect_b32_2x_literal_fold{{$}}
# GCN: %2:sreg_32 = S_CSELECT_B32 70, %1, implicit $scc
---
name: test_s_cselect_b32_2x_literal_fold
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 70
%1:sreg_32 = S_MOV_B32 80
$scc = IMPLICIT_DEF
%2:sreg_32 = S_CSELECT_B32 %0, %1, implicit $scc
...

# GCN-LABEL: name: test_s_cselect_b32_fold_literal_literal_lhs{{$}}
# GCN: %1:sreg_32 = S_CSELECT_B32 70, %0, implicit $scc
---
name: test_s_cselect_b32_fold_literal_literal_lhs
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 80
$scc = IMPLICIT_DEF
%1:sreg_32 = S_CSELECT_B32 70, %0, implicit $scc
...

# GCN-LABEL: name: test_s_cselect_b32_fold_literal_literal_rhs{{$}}
# GCN: %1:sreg_32 = S_CSELECT_B32 %0, 70, implicit $scc
---
name: test_s_cselect_b32_fold_literal_literal_rhs
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 80
$scc = IMPLICIT_DEF
%1:sreg_32 = S_CSELECT_B32 %0, 70, implicit $scc
...

# GCN-LABEL: name: test_s_cselect_b32_fold_literal_inlineimm_lhs{{$}}
# GCN: %1:sreg_32 = S_CSELECT_B32 64, 80, implicit $scc
---
name: test_s_cselect_b32_fold_literal_inlineimm_lhs
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 80
$scc = IMPLICIT_DEF
%1:sreg_32 = S_CSELECT_B32 64, %0, implicit $scc
...

# GCN-LABEL: name: test_s_cselect_b32_fold_literal_inlineimm_rhs{{$}}
# GCN: %1:sreg_32 = S_CSELECT_B32 80, 64, implicit $scc
---
name: test_s_cselect_b32_fold_literal_inlineimm_rhs
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 80
$scc = IMPLICIT_DEF
%1:sreg_32 = S_CSELECT_B32 %0, 64, implicit $scc
...

# GCN-LABEL: name: test_s_cmp_b32_2x_literal_fold{{$}}
# GCN: S_CMP_EQ_U32 70, %1, implicit-def $scc
---
name: test_s_cmp_b32_2x_literal_fold
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 70
%1:sreg_32 = S_MOV_B32 80
$scc = IMPLICIT_DEF
S_CMP_EQ_U32 %0, %1, implicit-def $scc
...

# GCN-LABEL: name: test_s_cmp_b32_literal_literal_lhs{{$}}
# GCN: S_CMP_EQ_U32 70, %0, implicit-def $scc
---
name: test_s_cmp_b32_literal_literal_lhs
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 80
S_CMP_EQ_U32 70, %0, implicit-def $scc
...

# GCN-LABEL: name: test_s_cmp_b32_literal_literal_rhs{{$}}
# GCN: S_CMP_EQ_U32 %0, 70, implicit-def $scc
---
name: test_s_cmp_b32_literal_literal_rhs
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 80
S_CMP_EQ_U32 %0, 70, implicit-def $scc
...

# GCN-LABEL: name: test_s_cmp_b32_literal_inlineimm_lhs{{$}}
# GCN: S_CMP_EQ_U32 64, 80, implicit-def $scc
---
name: test_s_cmp_b32_literal_inlineimm_lhs
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 80
S_CMP_EQ_U32 64, %0, implicit-def $scc
...

# GCN-LABEL: name: test_s_cmp_b32_literal_inlineimm_rhs{{$}}
# GCN: S_CMP_EQ_U32 80, 64, implicit-def $scc
---
name: test_s_cmp_b32_literal_inlineimm_rhs
tracksRegLiveness: true
body: |
bb.0:
%0:sreg_32 = S_MOV_B32 80
S_CMP_EQ_U32 %0, 64, implicit-def $scc
...