Skip to content

Commit bcaa095

Browse files
author
git apple-llvm automerger
committed
Merge commit '76cbe62262a3' from llvm.org/main into next
2 parents 9af3710 + 76cbe62 commit bcaa095

26 files changed

+5704
-4160
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -355,22 +355,32 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val,
355355

356356
auto const &ST = MF.getSubtarget<GCNSubtarget>();
357357

358-
unsigned ReturnOpc =
359-
IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return;
358+
unsigned ReturnOpc = 0;
359+
if (IsShader)
360+
ReturnOpc = AMDGPU::SI_RETURN_TO_EPILOG;
361+
else if (CC == CallingConv::AMDGPU_Gfx)
362+
ReturnOpc = AMDGPU::S_SETPC_B64_return_gfx;
363+
else
364+
ReturnOpc = AMDGPU::S_SETPC_B64_return;
360365

361366
auto Ret = B.buildInstrNoInsert(ReturnOpc);
362367
Register ReturnAddrVReg;
363368
if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
364369
ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass);
365370
Ret.addUse(ReturnAddrVReg);
371+
} else if (ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) {
372+
ReturnAddrVReg =
373+
MRI.createVirtualRegister(&AMDGPU::Gfx_CCR_SGPR_64RegClass);
374+
Ret.addUse(ReturnAddrVReg);
366375
}
367376

368377
if (!FLI.CanLowerReturn)
369378
insertSRetStores(B, Val->getType(), VRegs, FLI.DemoteRegister);
370379
else if (!lowerReturnVal(B, Val, VRegs, Ret))
371380
return false;
372381

373-
if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
382+
if (ReturnOpc == AMDGPU::S_SETPC_B64_return ||
383+
ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) {
374384
const SIRegisterInfo *TRI = ST.getRegisterInfo();
375385
Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF),
376386
&AMDGPU::SGPR_64RegClass);

llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@ def CC_SI_Gfx : CallingConv<[
2020
// 0-3 are reserved for the stack buffer descriptor
2121
// 30-31 are reserved for the return address
2222
// 32 is reserved for the stack pointer
23+
// 33 is reserved for the frame pointer
24+
// 34 is reserved for the base pointer
2325
CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
2426
SGPR4, SGPR5, SGPR6, SGPR7,
2527
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
2628
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
27-
SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29,
29+
SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29
2830
]>>>,
2931

3032
CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
@@ -41,17 +43,6 @@ def RetCC_SI_Gfx : CallingConv<[
4143
CCIfType<[i1], CCPromoteToType<i32>>,
4244
CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
4345

44-
// 0-3 are reserved for the stack buffer descriptor
45-
// 32 is reserved for the stack pointer
46-
CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
47-
SGPR4, SGPR5, SGPR6, SGPR7,
48-
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
49-
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
50-
SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
51-
SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
52-
SGPR40, SGPR41, SGPR42, SGPR43
53-
]>>>,
54-
5546
CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
5647
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
5748
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
@@ -165,6 +156,14 @@ def CSR_AMDGPU_SGPRs_32_105 : CalleeSavedRegs<
165156
(sequence "SGPR%u", 32, 105)
166157
>;
167158

159+
def CSR_AMDGPU_SI_Gfx_SGPRs_4_29 : CalleeSavedRegs<
160+
(sequence "SGPR%u", 4, 29)
161+
>;
162+
163+
def CSR_AMDGPU_SI_Gfx_SGPRs_64_105 : CalleeSavedRegs<
164+
(sequence "SGPR%u", 64, 105)
165+
>;
166+
168167
// Just to get the regmask, not for calling convention purposes.
169168
def CSR_AMDGPU_AllVGPRs : CalleeSavedRegs<
170169
(sequence "VGPR%u", 0, 255)
@@ -190,6 +189,14 @@ def CSR_AMDGPU_HighRegs_With_AGPRs : CalleeSavedRegs<
190189
(add CSR_AMDGPU_HighRegs, CSR_AMDGPU_AGPRs_32_255)
191190
>;
192191

192+
def CSR_AMDGPU_SI_Gfx : CalleeSavedRegs<
193+
(add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs_4_29, CSR_AMDGPU_SI_Gfx_SGPRs_64_105)
194+
>;
195+
196+
def CSR_AMDGPU_SI_Gfx_With_AGPRs : CalleeSavedRegs<
197+
(add CSR_AMDGPU_SI_Gfx, CSR_AMDGPU_AGPRs_32_255)
198+
>;
199+
193200
def CSR_AMDGPU_NoRegs : CalleeSavedRegs<(add)>;
194201

195202
// Calling convention for leaf functions

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4352,6 +4352,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
43524352
NODE_NAME_CASE(TC_RETURN)
43534353
NODE_NAME_CASE(TRAP)
43544354
NODE_NAME_CASE(RET_FLAG)
4355+
NODE_NAME_CASE(RET_GFX_FLAG)
43554356
NODE_NAME_CASE(RETURN_TO_EPILOG)
43564357
NODE_NAME_CASE(ENDPGM)
43574358
NODE_NAME_CASE(DWORDADDR)

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ namespace AMDGPUISD {
343343
enum NodeType : unsigned {
344344
// AMDIL ISD Opcodes
345345
FIRST_NUMBER = ISD::BUILTIN_OP_END,
346-
UMUL, // 32bit unsigned multiplication
346+
UMUL, // 32bit unsigned multiplication
347347
BRANCH_COND,
348348
// End AMDIL ISD Opcodes
349349

@@ -366,6 +366,9 @@ enum NodeType : unsigned {
366366
// Return with values from a non-entry function.
367367
RET_FLAG,
368368

369+
// Return with values from a non-entry function (AMDGPU_Gfx CC).
370+
RET_GFX_FLAG,
371+
369372
DWORDADDR,
370373
FRACT,
371374

@@ -422,10 +425,10 @@ enum NodeType : unsigned {
422425
DOT4,
423426
CARRY,
424427
BORROW,
425-
BFE_U32, // Extract range of bits with zero extension to 32-bits.
426-
BFE_I32, // Extract range of bits with sign extension to 32-bits.
427-
BFI, // (src0 & src1) | (~src0 & src2)
428-
BFM, // Insert a range of bits into a 32-bit word.
428+
BFE_U32, // Extract range of bits with zero extension to 32-bits.
429+
BFE_I32, // Extract range of bits with sign extension to 32-bits.
430+
BFI, // (src0 & src1) | (~src0 & src2)
431+
BFM, // Insert a range of bits into a 32-bit word.
429432
FFBH_U32, // ctlz with -1 if input is zero.
430433
FFBH_I32,
431434
FFBL_B32, // cttz with -1 if input is zero.
@@ -534,7 +537,6 @@ enum NodeType : unsigned {
534537
LAST_AMDGPU_ISD_NUMBER
535538
};
536539

537-
538540
} // End namespace AMDGPUISD
539541

540542
} // End namespace llvm

llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,10 @@ def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPt
359359
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
360360
>;
361361

362+
def AMDGPUret_gfx_flag : SDNode<"AMDGPUISD::RET_GFX_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
363+
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
364+
>;
365+
362366

363367
//===----------------------------------------------------------------------===//
364368
// Intrinsic/Custom node compatibility PatFrags

llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,8 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
120120
// FIXME: Should be able to handle this with emitPseudoExpansionLowering. We
121121
// need to select it to the subtarget specific version, and there's no way to
122122
// do that with a single pseudo source operation.
123-
if (Opcode == AMDGPU::S_SETPC_B64_return)
123+
if (Opcode == AMDGPU::S_SETPC_B64_return ||
124+
Opcode == AMDGPU::S_SETPC_B64_return_gfx)
124125
Opcode = AMDGPU::S_SETPC_B64;
125126
else if (Opcode == AMDGPU::SI_CALL) {
126127
// SI_CALL is just S_SWAPPC_B64 with an additional operand to track the

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2616,9 +2616,12 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
26162616
SDValue ReturnAddrReg = CreateLiveInRegister(
26172617
DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64);
26182618

2619-
SDValue ReturnAddrVirtualReg = DAG.getRegister(
2620-
MF.getRegInfo().createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass),
2621-
MVT::i64);
2619+
SDValue ReturnAddrVirtualReg =
2620+
DAG.getRegister(MF.getRegInfo().createVirtualRegister(
2621+
CallConv != CallingConv::AMDGPU_Gfx
2622+
? &AMDGPU::CCR_SGPR_64RegClass
2623+
: &AMDGPU::Gfx_CCR_SGPR_64RegClass),
2624+
MVT::i64);
26222625
Chain =
26232626
DAG.getCopyToReg(Chain, DL, ReturnAddrVirtualReg, ReturnAddrReg, Flag);
26242627
Flag = Chain.getValue(1);
@@ -2681,8 +2684,15 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
26812684
RetOps.push_back(Flag);
26822685

26832686
unsigned Opc = AMDGPUISD::ENDPGM;
2684-
if (!IsWaveEnd)
2685-
Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG;
2687+
if (!IsWaveEnd) {
2688+
if (IsShader)
2689+
Opc = AMDGPUISD::RETURN_TO_EPILOG;
2690+
else if (CallConv == CallingConv::AMDGPU_Gfx)
2691+
Opc = AMDGPUISD::RET_GFX_FLAG;
2692+
else
2693+
Opc = AMDGPUISD::RET_FLAG;
2694+
}
2695+
26862696
return DAG.getNode(Opc, DL, MVT::Other, RetOps);
26872697
}
26882698

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -963,6 +963,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
963963
// with knowledge of the called routines.
964964
if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
965965
MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
966+
MI.getOpcode() == AMDGPU::S_SETPC_B64_return_gfx ||
966967
(MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
967968
Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt()));
968969
}

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -360,10 +360,13 @@ const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
360360
case CallingConv::C:
361361
case CallingConv::Fast:
362362
case CallingConv::Cold:
363-
case CallingConv::AMDGPU_Gfx:
364363
return MF->getSubtarget<GCNSubtarget>().hasGFX90AInsts()
365364
? CSR_AMDGPU_HighRegs_With_AGPRs_SaveList
366365
: CSR_AMDGPU_HighRegs_SaveList;
366+
case CallingConv::AMDGPU_Gfx:
367+
return MF->getSubtarget<GCNSubtarget>().hasGFX90AInsts()
368+
? CSR_AMDGPU_SI_Gfx_With_AGPRs_SaveList
369+
: CSR_AMDGPU_SI_Gfx_SaveList;
367370
default: {
368371
// Dummy to not crash RegisterClassInfo.
369372
static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
@@ -383,10 +386,13 @@ const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
383386
case CallingConv::C:
384387
case CallingConv::Fast:
385388
case CallingConv::Cold:
386-
case CallingConv::AMDGPU_Gfx:
387389
return MF.getSubtarget<GCNSubtarget>().hasGFX90AInsts()
388390
? CSR_AMDGPU_HighRegs_With_AGPRs_RegMask
389391
: CSR_AMDGPU_HighRegs_RegMask;
392+
case CallingConv::AMDGPU_Gfx:
393+
return MF.getSubtarget<GCNSubtarget>().hasGFX90AInsts()
394+
? CSR_AMDGPU_SI_Gfx_With_AGPRs_RegMask
395+
: CSR_AMDGPU_SI_Gfx_RegMask;
390396
default:
391397
return nullptr;
392398
}

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,14 @@ def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
691691
let AllocationPriority = SGPR_64.AllocationPriority;
692692
}
693693

694+
// Call clobbered 64-bit SGPRs for AMDGPU_Gfx CC
695+
def Gfx_CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
696+
(add (trunc (shl SGPR_64, 15), 1), // s[30:31]
697+
(trunc (shl SGPR_64, 18), 14))> { // s[36:37]-s[s62:63]
698+
let CopyCost = SGPR_64.CopyCost;
699+
let AllocationPriority = SGPR_64.AllocationPriority;
700+
}
701+
694702
def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
695703
(add TTMP_64Regs)> {
696704
let isAllocatable = 0;

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">;
265265
let isReturn = 1 in {
266266
// Define variant marked as return rather than branch.
267267
def S_SETPC_B64_return : SOP1_1<"", CCR_SGPR_64, [(AMDGPUret_flag i64:$src0)]>;
268+
def S_SETPC_B64_return_gfx : SOP1_1<"", Gfx_CCR_SGPR_64, [(AMDGPUret_gfx_flag i64:$src0)]>;
268269
}
269270
} // End isTerminator = 1, isBarrier = 1
270271

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
1818
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void
1919
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
2020
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
21-
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
21+
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3
2222
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
23-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
24-
; CHECK-NEXT: S_SETPC_B64_return [[COPY2]]
23+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]]
24+
; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]]
2525
call amdgpu_gfx void @external_gfx_void_func_void()
2626
ret void
2727
}
@@ -39,10 +39,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
3939
; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
4040
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
4141
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
42-
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
42+
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
4343
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
44-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
45-
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]]
44+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]]
45+
; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]]
4646
call amdgpu_gfx void @external_gfx_void_func_i32(i32 42)
4747
ret void
4848
}
@@ -60,10 +60,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg
6060
; CHECK-NEXT: $sgpr4 = COPY [[C]](s32)
6161
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
6262
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
63-
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
63+
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
6464
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
65-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
66-
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]]
65+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]]
66+
; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]]
6767
call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42)
6868
ret void
6969
}
@@ -88,10 +88,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
8888
; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32)
8989
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
9090
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
91-
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
91+
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
9292
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
93-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
94-
; CHECK-NEXT: S_SETPC_B64_return [[COPY2]]
93+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]]
94+
; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]]
9595
%ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
9696
%val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
9797
call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val)
@@ -118,10 +118,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
118118
; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32)
119119
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
120120
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
121-
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
121+
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
122122
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
123-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
124-
; CHECK-NEXT: S_SETPC_B64_return [[COPY2]]
123+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]]
124+
; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]]
125125
%ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
126126
%val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
127127
call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val)

0 commit comments

Comments
 (0)