Skip to content

Commit b3bb5c3

Browse files
committed
[AMDGPU][GlobalISel] Use scalar min/max instructions
SALU min/max s32 instructions exist so use them. This means that regbankselect can handle min/max much like add/sub/mul/shifts. Differential Revision: https://reviews.llvm.org/D96047
1 parent e4a503a commit b3bb5c3

File tree

10 files changed

+1554
-2818
lines changed

10 files changed

+1554
-2818
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 11 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -591,21 +591,6 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
591591

592592
return AltMappings;
593593
}
594-
case TargetOpcode::G_SMIN:
595-
case TargetOpcode::G_SMAX:
596-
case TargetOpcode::G_UMIN:
597-
case TargetOpcode::G_UMAX: {
598-
static const OpRegBankEntry<3> Table[2] = {
599-
{ { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
600-
601-
// Scalar requires cmp+select, and extends if 16-bit.
602-
// FIXME: Should there be separate costs for 32 and 16-bit
603-
{ { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 3 }
604-
};
605-
606-
const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 1, 2 } };
607-
return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
608-
}
609594
case TargetOpcode::G_UADDE:
610595
case TargetOpcode::G_USUBE:
611596
case TargetOpcode::G_SADDE:
@@ -1576,23 +1561,8 @@ bool AMDGPURegisterBankInfo::applyMappingBFEIntrinsic(
15761561
return true;
15771562
}
15781563

1579-
// FIXME: Duplicated from LegalizerHelper
1580-
static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
1581-
switch (Opc) {
1582-
case TargetOpcode::G_SMIN:
1583-
return CmpInst::ICMP_SLT;
1584-
case TargetOpcode::G_SMAX:
1585-
return CmpInst::ICMP_SGT;
1586-
case TargetOpcode::G_UMIN:
1587-
return CmpInst::ICMP_ULT;
1588-
case TargetOpcode::G_UMAX:
1589-
return CmpInst::ICMP_UGT;
1590-
default:
1591-
llvm_unreachable("not in integer min/max");
1592-
}
1593-
}
1594-
1595-
static unsigned minMaxToExtend(unsigned Opc) {
1564+
// Return a suitable opcode for extending the operands of Opc when widening.
1565+
static unsigned getExtendOp(unsigned Opc) {
15961566
switch (Opc) {
15971567
case TargetOpcode::G_SMIN:
15981568
case TargetOpcode::G_SMAX:
@@ -1601,7 +1571,7 @@ static unsigned minMaxToExtend(unsigned Opc) {
16011571
case TargetOpcode::G_UMAX:
16021572
return TargetOpcode::G_ZEXT;
16031573
default:
1604-
llvm_unreachable("not in integer min/max");
1574+
return TargetOpcode::G_ANYEXT;
16051575
}
16061576
}
16071577

@@ -1628,30 +1598,6 @@ unpackV2S16ToS32(MachineIRBuilder &B, Register Src, unsigned ExtOpcode) {
16281598
return std::make_pair(Bitcast.getReg(0), ShiftHi.getReg(0));
16291599
}
16301600

1631-
static MachineInstr *buildExpandedScalarMinMax(MachineIRBuilder &B,
1632-
CmpInst::Predicate Pred,
1633-
Register Dst, Register Src0,
1634-
Register Src1) {
1635-
const LLT CmpType = LLT::scalar(32);
1636-
auto Cmp = B.buildICmp(Pred, CmpType, Src0, Src1);
1637-
return B.buildSelect(Dst, Cmp, Src0, Src1);
1638-
}
1639-
1640-
// FIXME: Duplicated from LegalizerHelper, except changing the boolean type.
1641-
void AMDGPURegisterBankInfo::lowerScalarMinMax(MachineIRBuilder &B,
1642-
MachineInstr &MI) const {
1643-
Register Dst = MI.getOperand(0).getReg();
1644-
Register Src0 = MI.getOperand(1).getReg();
1645-
Register Src1 = MI.getOperand(2).getReg();
1646-
1647-
const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
1648-
MachineInstr *Sel = buildExpandedScalarMinMax(B, Pred, Dst, Src0, Src1);
1649-
1650-
Register CmpReg = Sel->getOperand(1).getReg();
1651-
B.getMRI()->setRegBank(CmpReg, AMDGPU::SGPRRegBank);
1652-
MI.eraseFromParent();
1653-
}
1654-
16551601
// For cases where only a single copy is inserted for matching register banks.
16561602
// Replace the register in the instruction operand
16571603
static bool substituteSimpleCopyRegs(
@@ -2341,7 +2287,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
23412287
case AMDGPU::G_MUL:
23422288
case AMDGPU::G_SHL:
23432289
case AMDGPU::G_LSHR:
2344-
case AMDGPU::G_ASHR: {
2290+
case AMDGPU::G_ASHR:
2291+
case AMDGPU::G_SMIN:
2292+
case AMDGPU::G_SMAX:
2293+
case AMDGPU::G_UMIN:
2294+
case AMDGPU::G_UMAX: {
23452295
Register DstReg = MI.getOperand(0).getReg();
23462296
LLT DstTy = MRI.getType(DstReg);
23472297

@@ -2365,10 +2315,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
23652315
Register WideSrc0Lo, WideSrc0Hi;
23662316
Register WideSrc1Lo, WideSrc1Hi;
23672317

2318+
unsigned ExtendOp = getExtendOp(MI.getOpcode());
23682319
std::tie(WideSrc0Lo, WideSrc0Hi)
2369-
= unpackV2S16ToS32(B, MI.getOperand(1).getReg(), AMDGPU::G_ANYEXT);
2320+
= unpackV2S16ToS32(B, MI.getOperand(1).getReg(), ExtendOp);
23702321
std::tie(WideSrc1Lo, WideSrc1Hi)
2371-
= unpackV2S16ToS32(B, MI.getOperand(2).getReg(), AMDGPU::G_ANYEXT);
2322+
= unpackV2S16ToS32(B, MI.getOperand(2).getReg(), ExtendOp);
23722323
auto Lo = B.buildInstr(MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});
23732324
auto Hi = B.buildInstr(MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});
23742325
B.buildBuildVectorTrunc(DstReg, {Lo.getReg(0), Hi.getReg(0)});
@@ -2390,73 +2341,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
23902341

23912342
return;
23922343
}
2393-
case AMDGPU::G_SMIN:
2394-
case AMDGPU::G_SMAX:
2395-
case AMDGPU::G_UMIN:
2396-
case AMDGPU::G_UMAX: {
2397-
Register DstReg = MI.getOperand(0).getReg();
2398-
const RegisterBank *DstBank =
2399-
OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
2400-
if (DstBank == &AMDGPU::VGPRRegBank)
2401-
break;
2402-
2403-
MachineFunction *MF = MI.getParent()->getParent();
2404-
MachineIRBuilder B(MI);
2405-
2406-
// Turn scalar min/max into a compare and select.
2407-
LLT Ty = MRI.getType(DstReg);
2408-
const LLT S32 = LLT::scalar(32);
2409-
const LLT S16 = LLT::scalar(16);
2410-
const LLT V2S16 = LLT::vector(2, 16);
2411-
2412-
if (Ty == V2S16) {
2413-
ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
2414-
B.setChangeObserver(ApplySALU);
2415-
2416-
// Need to widen to s32, and expand as cmp + select, and avoid producing
2417-
// illegal vector extends or unmerges that would need further
2418-
// legalization.
2419-
//
2420-
// TODO: Should we just readfirstlane? That should probably be handled
2421-
// with a UniformVGPR register bank that wouldn't need special
2422-
// consideration here.
2423-
2424-
Register Dst = MI.getOperand(0).getReg();
2425-
Register Src0 = MI.getOperand(1).getReg();
2426-
Register Src1 = MI.getOperand(2).getReg();
2427-
2428-
Register WideSrc0Lo, WideSrc0Hi;
2429-
Register WideSrc1Lo, WideSrc1Hi;
2430-
2431-
unsigned ExtendOp = minMaxToExtend(MI.getOpcode());
2432-
2433-
std::tie(WideSrc0Lo, WideSrc0Hi) = unpackV2S16ToS32(B, Src0, ExtendOp);
2434-
std::tie(WideSrc1Lo, WideSrc1Hi) = unpackV2S16ToS32(B, Src1, ExtendOp);
2435-
2436-
Register Lo = MRI.createGenericVirtualRegister(S32);
2437-
Register Hi = MRI.createGenericVirtualRegister(S32);
2438-
const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
2439-
buildExpandedScalarMinMax(B, Pred, Lo, WideSrc0Lo, WideSrc1Lo);
2440-
buildExpandedScalarMinMax(B, Pred, Hi, WideSrc0Hi, WideSrc1Hi);
2441-
2442-
B.buildBuildVectorTrunc(Dst, {Lo, Hi});
2443-
MI.eraseFromParent();
2444-
} else if (Ty == S16) {
2445-
ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
2446-
B.setChangeObserver(ApplySALU);
2447-
LegalizerHelper Helper(*MF, ApplySALU, B);
2448-
2449-
// Need to widen to s32, and expand as cmp + select.
2450-
if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
2451-
llvm_unreachable("widenScalar should have succeeded");
2452-
2453-
// FIXME: This is relying on widenScalar leaving MI in place.
2454-
lowerScalarMinMax(B, MI);
2455-
} else
2456-
lowerScalarMinMax(B, MI);
2457-
2458-
return;
2459-
}
24602344
case AMDGPU::G_SEXT_INREG: {
24612345
SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1));
24622346
if (SrcRegs.empty())

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,6 @@ class AMDGPURegisterBankInfo final : public AMDGPUGenRegisterBankInfo {
8484
bool applyMappingBFEIntrinsic(const OperandsMapper &OpdMapper,
8585
bool Signed) const;
8686

87-
void lowerScalarMinMax(MachineIRBuilder &B, MachineInstr &MI) const;
88-
8987
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
9088
Register Reg) const;
9189

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@ body: |
1313
; CHECK-LABEL: name: smax_s32_ss
1414
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
1515
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
16-
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]]
17-
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
16+
; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[COPY1]]
1817
%0:_(s32) = COPY $sgpr0
1918
%1:_(s32) = COPY $sgpr1
2019
%2:_(s32) = G_SMAX %0, %1
@@ -90,9 +89,8 @@ body: |
9089
; CHECK-LABEL: name: smax_s32_ss_vgpr_use
9190
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
9291
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
93-
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]]
94-
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
95-
; CHECK: $vgpr0 = COPY [[SELECT]](s32)
92+
; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[COPY1]]
93+
; CHECK: $vgpr0 = COPY [[SMAX]](s32)
9694
%0:_(s32) = COPY $sgpr0
9795
%1:_(s32) = COPY $sgpr1
9896
%2:_(s32) = G_SMAX %0, %1
@@ -114,9 +112,8 @@ body: |
114112
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
115113
; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16)
116114
; CHECK: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16)
117-
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
118-
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[SEXT]](s32), [[SEXT1]]
119-
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT]], [[SEXT1]]
115+
; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT]], [[SEXT1]]
116+
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMAX]](s32)
120117
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
121118
; CHECK: $sgpr0 = COPY [[ANYEXT]](s32)
122119
%0:_(s32) = COPY $sgpr0
@@ -144,9 +141,8 @@ body: |
144141
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
145142
; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16)
146143
; CHECK: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16)
147-
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
148-
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[SEXT]](s32), [[SEXT1]]
149-
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT]], [[SEXT1]]
144+
; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT]], [[SEXT1]]
145+
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMAX]](s32)
150146
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
151147
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
152148
%0:_(s32) = COPY $sgpr0
@@ -178,11 +174,9 @@ body: |
178174
; CHECK: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16
179175
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
180176
; CHECK: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32)
181-
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[SEXT_INREG]](s32), [[SEXT_INREG1]]
182-
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT_INREG]], [[SEXT_INREG1]]
183-
; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[ASHR]](s32), [[ASHR1]]
184-
; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[ASHR]], [[ASHR1]]
185-
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
177+
; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]]
178+
; CHECK: [[SMAX1:%[0-9]+]]:sgpr(s32) = G_SMAX [[ASHR]], [[ASHR1]]
179+
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SMAX]](s32), [[SMAX1]](s32)
186180
; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
187181
%0:_(<2 x s16>) = COPY $sgpr0
188182
%1:_(<2 x s16>) = COPY $sgpr1

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
3-
# XUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
3+
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
44

55
---
66
name: smin_s32_ss
@@ -13,9 +13,8 @@ body: |
1313
; CHECK-LABEL: name: smin_s32_ss
1414
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
1515
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
16-
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]]
17-
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
18-
; CHECK: $sgpr0 = COPY [[SELECT]](s32)
16+
; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[COPY]], [[COPY1]]
17+
; CHECK: $sgpr0 = COPY [[SMIN]](s32)
1918
%0:_(s32) = COPY $sgpr0
2019
%1:_(s32) = COPY $sgpr1
2120
%2:_(s32) = G_SMIN %0, %1
@@ -93,9 +92,8 @@ body: |
9392
; CHECK-LABEL: name: smin_s32_ss_vgpr_use
9493
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
9594
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
96-
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]]
97-
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
98-
; CHECK: $vgpr0 = COPY [[SELECT]](s32)
95+
; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[COPY]], [[COPY1]]
96+
; CHECK: $vgpr0 = COPY [[SMIN]](s32)
9997
%0:_(s32) = COPY $sgpr0
10098
%1:_(s32) = COPY $sgpr1
10199
%2:_(s32) = G_SMIN %0, %1
@@ -117,9 +115,8 @@ body: |
117115
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
118116
; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16)
119117
; CHECK: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16)
120-
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
121-
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SEXT]](s32), [[SEXT1]]
122-
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT]], [[SEXT1]]
118+
; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT]], [[SEXT1]]
119+
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMIN]](s32)
123120
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
124121
; CHECK: $sgpr0 = COPY [[ANYEXT]](s32)
125122
%0:_(s32) = COPY $sgpr0
@@ -147,9 +144,8 @@ body: |
147144
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
148145
; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16)
149146
; CHECK: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16)
150-
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
151-
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SEXT]](s32), [[SEXT1]]
152-
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT]], [[SEXT1]]
147+
; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT]], [[SEXT1]]
148+
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMIN]](s32)
153149
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
154150
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
155151
%0:_(s32) = COPY $sgpr0
@@ -181,11 +177,9 @@ body: |
181177
; CHECK: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16
182178
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
183179
; CHECK: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32)
184-
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]]
185-
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT_INREG]], [[SEXT_INREG1]]
186-
; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[ASHR]](s32), [[ASHR1]]
187-
; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[ASHR]], [[ASHR1]]
188-
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
180+
; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]]
181+
; CHECK: [[SMIN1:%[0-9]+]]:sgpr(s32) = G_SMIN [[ASHR]], [[ASHR1]]
182+
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SMIN]](s32), [[SMIN1]](s32)
189183
; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
190184
%0:_(<2 x s16>) = COPY $sgpr0
191185
%1:_(<2 x s16>) = COPY $sgpr1

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,8 @@ body: |
1313
; CHECK-LABEL: name: umax_s32_ss
1414
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
1515
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
16-
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]]
17-
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
18-
; CHECK: $sgpr0 = COPY [[SELECT]](s32)
16+
; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[COPY1]]
17+
; CHECK: $sgpr0 = COPY [[UMAX]](s32)
1918
%0:_(s32) = COPY $sgpr0
2019
%1:_(s32) = COPY $sgpr1
2120
%2:_(s32) = G_UMAX %0, %1
@@ -93,9 +92,8 @@ body: |
9392
; CHECK-LABEL: name: umax_s32_ss_vgpr_use
9493
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
9594
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
96-
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]]
97-
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
98-
; CHECK: $vgpr0 = COPY [[SELECT]](s32)
95+
; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[COPY1]]
96+
; CHECK: $vgpr0 = COPY [[UMAX]](s32)
9997
%0:_(s32) = COPY $sgpr0
10098
%1:_(s32) = COPY $sgpr1
10199
%2:_(s32) = G_UMAX %0, %1
@@ -117,9 +115,8 @@ body: |
117115
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
118116
; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16)
119117
; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16)
120-
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
121-
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[ZEXT]](s32), [[ZEXT1]]
122-
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[ZEXT]], [[ZEXT1]]
118+
; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[ZEXT]], [[ZEXT1]]
119+
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMAX]](s32)
123120
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
124121
; CHECK: $sgpr0 = COPY [[ANYEXT]](s32)
125122
%0:_(s32) = COPY $sgpr0
@@ -147,9 +144,8 @@ body: |
147144
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
148145
; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16)
149146
; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16)
150-
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
151-
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[ZEXT]](s32), [[ZEXT1]]
152-
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[ZEXT]], [[ZEXT1]]
147+
; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[ZEXT]], [[ZEXT1]]
148+
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMAX]](s32)
153149
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
154150
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
155151
%0:_(s32) = COPY $sgpr0
@@ -183,11 +179,9 @@ body: |
183179
; CHECK: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
184180
; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
185181
; CHECK: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]]
186-
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND1]]
187-
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[AND]], [[AND1]]
188-
; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[LSHR]](s32), [[LSHR1]]
189-
; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[LSHR]], [[LSHR1]]
190-
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
182+
; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[AND]], [[AND1]]
183+
; CHECK: [[UMAX1:%[0-9]+]]:sgpr(s32) = G_UMAX [[LSHR]], [[LSHR1]]
184+
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UMAX]](s32), [[UMAX1]](s32)
191185
; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
192186
%0:_(<2 x s16>) = COPY $sgpr0
193187
%1:_(<2 x s16>) = COPY $sgpr1

0 commit comments

Comments
 (0)