Skip to content

Commit 585448f

Browse files
AMDGPU/GlobalISel: add RegBankLegalize rules for bit shifts and sext-inreg
Uniform S16 shifts have to be extended to S32 using appropriate Extend before lowering to S32 instruction. Uniform packed V2S16 are lowered to SGPR S32 instructions, other option is to use VALU packed V2S16 and ReadAnyLane. For uniform S32 and S64 and divergent S16, S32, S64 and V2S16 there are instructions available.
1 parent ea57c82 commit 585448f

13 files changed

+311
-151
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
310310
// Opcodes that support pretty much all combinations of reg banks and LLTs
311311
// (except S1). There is no point in writing rules for them.
312312
if (Opc == AMDGPU::G_BUILD_VECTOR || Opc == AMDGPU::G_UNMERGE_VALUES ||
313-
Opc == AMDGPU::G_MERGE_VALUES) {
313+
Opc == AMDGPU::G_MERGE_VALUES || Opc == G_BITCAST) {
314314
RBLHelper.applyMappingTrivial(*MI);
315315
continue;
316316
}

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414
#include "AMDGPURegBankLegalizeHelper.h"
1515
#include "AMDGPUGlobalISelUtils.h"
1616
#include "AMDGPUInstrInfo.h"
17+
#include "AMDGPURegBankLegalizeRules.h"
1718
#include "AMDGPURegisterBankInfo.h"
1819
#include "GCNSubtarget.h"
1920
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
2021
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
2122
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
23+
#include "llvm/CodeGen/MachineInstr.h"
2224
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
2325
#include "llvm/IR/IntrinsicsAMDGPU.h"
2426
#include "llvm/Support/AMDGPUAddrSpace.h"
@@ -166,6 +168,59 @@ void RegBankLegalizeHelper::lowerVccExtToSel(MachineInstr &MI) {
166168
MI.eraseFromParent();
167169
}
168170

171+
std::pair<Register, Register> RegBankLegalizeHelper::unpackZExt(Register Reg) {
172+
auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
173+
auto Mask = B.buildConstant(SgprRB_S32, 0x0000ffff);
174+
auto Lo = B.buildAnd(SgprRB_S32, PackedS32, Mask);
175+
auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
176+
return {Lo.getReg(0), Hi.getReg(0)};
177+
}
178+
179+
std::pair<Register, Register> RegBankLegalizeHelper::unpackSExt(Register Reg) {
180+
auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
181+
auto Lo = B.buildSExtInReg(SgprRB_S32, PackedS32, 16);
182+
auto Hi = B.buildAShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
183+
return {Lo.getReg(0), Hi.getReg(0)};
184+
}
185+
186+
std::pair<Register, Register> RegBankLegalizeHelper::unpackAExt(Register Reg) {
187+
auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
188+
auto Lo = PackedS32;
189+
auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
190+
return {Lo.getReg(0), Hi.getReg(0)};
191+
}
192+
193+
void RegBankLegalizeHelper::lowerUnpack(MachineInstr &MI) {
194+
Register Lo, Hi;
195+
switch (MI.getOpcode()) {
196+
case AMDGPU::G_SHL: {
197+
auto [Val0, Val1] = unpackAExt(MI.getOperand(1).getReg());
198+
auto [Amt0, Amt1] = unpackAExt(MI.getOperand(2).getReg());
199+
Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
200+
Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
201+
break;
202+
}
203+
case AMDGPU::G_LSHR: {
204+
auto [Val0, Val1] = unpackZExt(MI.getOperand(1).getReg());
205+
auto [Amt0, Amt1] = unpackZExt(MI.getOperand(2).getReg());
206+
Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
207+
Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
208+
break;
209+
}
210+
case AMDGPU::G_ASHR: {
211+
auto [Val0, Val1] = unpackSExt(MI.getOperand(1).getReg());
212+
auto [Amt0, Amt1] = unpackSExt(MI.getOperand(2).getReg());
213+
Lo = B.buildAShr(SgprRB_S32, Val0, Amt0).getReg(0);
214+
Hi = B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0);
215+
break;
216+
}
217+
default:
218+
llvm_unreachable("Unpack lowering not implemented");
219+
}
220+
B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
221+
MI.eraseFromParent();
222+
}
223+
169224
static bool isSignedBFE(MachineInstr &MI) {
170225
if (isa<GIntrinsic>(MI)) {
171226
if (MI.getOperand(1).getIntrinsicID() == Intrinsic::amdgcn_sbfe)
@@ -303,6 +358,33 @@ void RegBankLegalizeHelper::lowerSplitTo32Sel(MachineInstr &MI) {
303358
MI.eraseFromParent();
304359
}
305360

361+
void RegBankLegalizeHelper::lowerSplitTo32SExtInReg(MachineInstr &MI) {
362+
auto Op1 = B.buildUnmerge(VgprRB_S32, MI.getOperand(1).getReg());
363+
int Amt = MI.getOperand(2).getImm();
364+
Register Lo, Hi;
365+
// Hi|Lo: s sign bit, ?/x bits changed/not changed by sign-extend
366+
if (Amt <= 32) {
367+
auto Freeze = B.buildFreeze(VgprRB_S32, Op1.getReg(0));
368+
if (Amt == 32) {
369+
// Hi|Lo: ????????|sxxxxxxx -> ssssssss|sxxxxxxx
370+
Lo = Freeze.getReg(0);
371+
} else {
372+
// Hi|Lo: ????????|???sxxxx -> ssssssss|ssssxxxx
373+
Lo = B.buildSExtInReg(VgprRB_S32, Freeze, Amt).getReg(0);
374+
}
375+
376+
auto SignExtCst = B.buildConstant(SgprRB_S32, 31);
377+
Hi = B.buildAShr(VgprRB_S32, Lo, SignExtCst).getReg(0);
378+
} else {
379+
// Hi|Lo: ?????sxx|xxxxxxxx -> ssssssxx|xxxxxxxx
380+
Lo = Op1.getReg(0);
381+
Hi = B.buildSExtInReg(VgprRB_S32, Op1.getReg(1), Amt - 32).getReg(0);
382+
}
383+
384+
B.buildMergeLikeInstr(MI.getOperand(0).getReg(), {Lo, Hi});
385+
MI.eraseFromParent();
386+
}
387+
306388
void RegBankLegalizeHelper::lower(MachineInstr &MI,
307389
const RegBankLLTMapping &Mapping,
308390
SmallSet<Register, 4> &WaterfallSgprs) {
@@ -325,6 +407,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
325407
MI.eraseFromParent();
326408
return;
327409
}
410+
case Unpack:
411+
return lowerUnpack(MI);
328412
case Ext32To64: {
329413
const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
330414
MachineInstrBuilder Hi;
@@ -391,6 +475,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
391475
return lowerSplitTo32(MI);
392476
case SplitTo32Sel:
393477
return lowerSplitTo32Sel(MI);
478+
case SplitTo32SExtInReg:
479+
return lowerSplitTo32SExtInReg(MI);
394480
case SplitLoad: {
395481
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
396482
unsigned Size = DstTy.getSizeInBits();
@@ -480,6 +566,13 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
480566
case SgprP5:
481567
case VgprP5:
482568
return LLT::pointer(5, 32);
569+
case SgprV2S16:
570+
case VgprV2S16:
571+
case UniInVgprV2S16:
572+
return LLT::fixed_vector(2, 16);
573+
case SgprV2S32:
574+
case VgprV2S32:
575+
return LLT::fixed_vector(2, 32);
483576
case SgprV4S32:
484577
case VgprV4S32:
485578
case UniInVgprV4S32:
@@ -553,6 +646,8 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
553646
case SgprP3:
554647
case SgprP4:
555648
case SgprP5:
649+
case SgprV2S16:
650+
case SgprV2S32:
556651
case SgprV4S32:
557652
case SgprB32:
558653
case SgprB64:
@@ -562,6 +657,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
562657
case SgprB512:
563658
case UniInVcc:
564659
case UniInVgprS32:
660+
case UniInVgprV2S16:
565661
case UniInVgprV4S32:
566662
case UniInVgprB32:
567663
case UniInVgprB64:
@@ -583,6 +679,8 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
583679
case VgprP3:
584680
case VgprP4:
585681
case VgprP5:
682+
case VgprV2S16:
683+
case VgprV2S32:
586684
case VgprV4S32:
587685
case VgprB32:
588686
case VgprB64:
@@ -620,6 +718,8 @@ void RegBankLegalizeHelper::applyMappingDst(
620718
case SgprP3:
621719
case SgprP4:
622720
case SgprP5:
721+
case SgprV2S16:
722+
case SgprV2S32:
623723
case SgprV4S32:
624724
case Vgpr16:
625725
case Vgpr32:
@@ -629,6 +729,8 @@ void RegBankLegalizeHelper::applyMappingDst(
629729
case VgprP3:
630730
case VgprP4:
631731
case VgprP5:
732+
case VgprV2S16:
733+
case VgprV2S32:
632734
case VgprV4S32: {
633735
assert(Ty == getTyFromID(MethodIDs[OpIdx]));
634736
assert(RB == getRegBankFromID(MethodIDs[OpIdx]));
@@ -663,6 +765,7 @@ void RegBankLegalizeHelper::applyMappingDst(
663765
break;
664766
}
665767
case UniInVgprS32:
768+
case UniInVgprV2S16:
666769
case UniInVgprV4S32: {
667770
assert(Ty == getTyFromID(MethodIDs[OpIdx]));
668771
assert(RB == SgprRB);
@@ -736,6 +839,8 @@ void RegBankLegalizeHelper::applyMappingSrc(
736839
case SgprP3:
737840
case SgprP4:
738841
case SgprP5:
842+
case SgprV2S16:
843+
case SgprV2S32:
739844
case SgprV4S32: {
740845
assert(Ty == getTyFromID(MethodIDs[i]));
741846
assert(RB == getRegBankFromID(MethodIDs[i]));
@@ -761,6 +866,8 @@ void RegBankLegalizeHelper::applyMappingSrc(
761866
case VgprP3:
762867
case VgprP4:
763868
case VgprP5:
869+
case VgprV2S16:
870+
case VgprV2S32:
764871
case VgprV4S32: {
765872
assert(Ty == getTyFromID(MethodIDs[i]));
766873
if (RB != VgprRB) {

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,15 @@ class RegBankLegalizeHelper {
111111
SmallSet<Register, 4> &SgprWaterfallOperandRegs);
112112

113113
void lowerVccExtToSel(MachineInstr &MI);
114+
std::pair<Register, Register> unpackZExt(Register Reg);
115+
std::pair<Register, Register> unpackSExt(Register Reg);
116+
std::pair<Register, Register> unpackAExt(Register Reg);
117+
void lowerUnpack(MachineInstr &MI);
114118
void lowerV_BFE(MachineInstr &MI);
115119
void lowerS_BFE(MachineInstr &MI);
116120
void lowerSplitTo32(MachineInstr &MI);
117121
void lowerSplitTo32Sel(MachineInstr &MI);
122+
void lowerSplitTo32SExtInReg(MachineInstr &MI);
118123
};
119124

120125
} // end namespace AMDGPU

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
6060
return MRI.getType(Reg) == LLT::pointer(4, 64);
6161
case P5:
6262
return MRI.getType(Reg) == LLT::pointer(5, 32);
63+
case V2S32:
64+
return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
6365
case V4S32:
6466
return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
6567
case B32:
@@ -92,6 +94,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
9294
return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
9395
case UniP5:
9496
return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
97+
case UniV2S16:
98+
return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
9599
case UniB32:
96100
return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
97101
case UniB64:
@@ -122,6 +126,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
122126
return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg);
123127
case DivP5:
124128
return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg);
129+
case DivV2S16:
130+
return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
125131
case DivB32:
126132
return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
127133
case DivB64:
@@ -435,7 +441,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
435441
MachineRegisterInfo &_MRI)
436442
: ST(&_ST), MRI(&_MRI) {
437443

438-
addRulesForGOpcs({G_ADD}, Standard)
444+
addRulesForGOpcs({G_ADD, G_SUB}, Standard)
439445
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
440446
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
441447

@@ -452,11 +458,36 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
452458
.Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
453459

454460
addRulesForGOpcs({G_SHL}, Standard)
461+
.Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
462+
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
463+
.Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, Unpack})
464+
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
465+
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
466+
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
455467
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
468+
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
469+
470+
addRulesForGOpcs({G_LSHR}, Standard)
471+
.Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
472+
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
473+
.Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, Unpack})
474+
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
475+
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
456476
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
477+
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
457478
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
458479

459-
addRulesForGOpcs({G_LSHR}, Standard).Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}});
480+
addRulesForGOpcs({G_ASHR}, Standard)
481+
.Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
482+
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
483+
.Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, Unpack})
484+
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
485+
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
486+
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
487+
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
488+
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
489+
490+
addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
460491

461492
addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
462493
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
@@ -515,6 +546,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
515546
.Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
516547
.Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
517548
.Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
549+
.Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
550+
.Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
518551
// This is non-trivial. VgprToVccCopy is done using compare instruction.
519552
.Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}})
520553
.Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
@@ -550,6 +583,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
550583
.Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
551584
.Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
552585

586+
addRulesForGOpcs({G_SEXT_INREG})
587+
.Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
588+
.Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
589+
.Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
590+
.Any({{DivS64, S64}, {{Vgpr64}, {Vgpr64}, SplitTo32SExtInReg}});
591+
553592
bool hasUnalignedLoads = ST->getGeneration() >= AMDGPUSubtarget::GFX12;
554593
bool hasSMRDSmall = ST->hasScalarSubwordLoads();
555594

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@ enum UniformityLLTOpPredicateID {
7575
V3S32,
7676
V4S32,
7777

78+
UniV2S16,
79+
80+
DivV2S16,
81+
7882
// B types
7983
B32,
8084
B64,
@@ -117,7 +121,9 @@ enum RegBankLLTMappingApplyID {
117121
SgprP3,
118122
SgprP4,
119123
SgprP5,
124+
SgprV2S16,
120125
SgprV4S32,
126+
SgprV2S32,
121127
SgprB32,
122128
SgprB64,
123129
SgprB96,
@@ -134,6 +140,8 @@ enum RegBankLLTMappingApplyID {
134140
VgprP3,
135141
VgprP4,
136142
VgprP5,
143+
VgprV2S16,
144+
VgprV2S32,
137145
VgprB32,
138146
VgprB64,
139147
VgprB96,
@@ -145,6 +153,7 @@ enum RegBankLLTMappingApplyID {
145153
// Dst only modifiers: read-any-lane and truncs
146154
UniInVcc,
147155
UniInVgprS32,
156+
UniInVgprV2S16,
148157
UniInVgprV4S32,
149158
UniInVgprB32,
150159
UniInVgprB64,
@@ -173,11 +182,13 @@ enum LoweringMethodID {
173182
DoNotLower,
174183
VccExtToSel,
175184
UniExtToSel,
185+
Unpack,
176186
S_BFE,
177187
V_BFE,
178188
VgprToVccCopy,
179189
SplitTo32,
180190
SplitTo32Sel,
191+
SplitTo32SExtInReg,
181192
Ext32To64,
182193
UniCstExt,
183194
SplitLoad,

llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
2+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
77

88
define i8 @v_ashr_i8(i8 %value, i8 %amount) {
99
; GFX6-LABEL: v_ashr_i8:

0 commit comments

Comments
 (0)