Skip to content

Commit 40381ca

Browse files
committed
update with latest changes from llvm#89217
1 parent 6714741 commit 40381ca

10 files changed

+96
-116
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5508,12 +5508,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
55085508
NODE_NAME_CASE(LDS)
55095509
NODE_NAME_CASE(FPTRUNC_ROUND_UPWARD)
55105510
NODE_NAME_CASE(FPTRUNC_ROUND_DOWNWARD)
5511-
NODE_NAME_CASE(READLANE)
5512-
NODE_NAME_CASE(READFIRSTLANE)
5513-
NODE_NAME_CASE(WRITELANE)
5514-
NODE_NAME_CASE(PERMLANE16)
5515-
NODE_NAME_CASE(PERMLANEX16)
5516-
NODE_NAME_CASE(PERMLANE64)
55175511
NODE_NAME_CASE(DUMMY_CHAIN)
55185512
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
55195513
NODE_NAME_CASE(LOAD_D16_HI)

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -558,13 +558,6 @@ enum NodeType : unsigned {
558558
FPTRUNC_ROUND_UPWARD,
559559
FPTRUNC_ROUND_DOWNWARD,
560560

561-
READLANE,
562-
READFIRSTLANE,
563-
WRITELANE,
564-
PERMLANE16,
565-
PERMLANEX16,
566-
PERMLANE64,
567-
568561
DUMMY_CHAIN,
569562
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
570563
LOAD_D16_HI,

llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td

Lines changed: 0 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -342,34 +342,6 @@ def AMDGPUfdot2_impl : SDNode<"AMDGPUISD::FDOT2",
342342

343343
def AMDGPUperm_impl : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
344344

345-
def AMDGPUReadfirstlaneOp : SDTypeProfile<1, 1, [
346-
SDTCisSameAs<0, 1>
347-
]>;
348-
349-
def AMDGPUReadlaneOp : SDTypeProfile<1, 2, [
350-
SDTCisSameAs<0, 1>, SDTCisInt<2>
351-
]>;
352-
353-
def AMDGPUDWritelaneOp : SDTypeProfile<1, 3, [
354-
SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisSameAs<0, 3>
355-
]>;
356-
357-
def AMDGPUDPermlane16Op : SDTypeProfile<1, 6, [
358-
SDTCisSameAs<0, 1>, // old
359-
SDTCisSameAs<0, 2>, // src0
360-
SDTCisInt<3>, // src1
361-
SDTCisInt<4>, // src2
362-
SDTCisInt<5>, // i1 fi
363-
SDTCisInt<6> // i1 bound_ctrl
364-
]>;
365-
366-
def AMDGPUreadlane_impl : SDNode<"AMDGPUISD::READLANE", AMDGPUReadlaneOp>;
367-
def AMDGPUreadfirstlane_impl : SDNode<"AMDGPUISD::READFIRSTLANE", AMDGPUReadfirstlaneOp>;
368-
def AMDGPUwritelane_impl : SDNode<"AMDGPUISD::WRITELANE", AMDGPUDWritelaneOp>;
369-
def AMDGPUpermlane16_impl : SDNode<"AMDGPUISD::PERMLANE16", AMDGPUDPermlane16Op>;
370-
def AMDGPUpermlanex16_impl : SDNode<"AMDGPUISD::PERMLANEX16", AMDGPUDPermlane16Op>;
371-
def AMDGPUpermlane64_impl : SDNode<"AMDGPUISD::PERMLANE64", AMDGPUReadfirstlaneOp>;
372-
373345
// SI+ export
374346
def AMDGPUExportOp : SDTypeProfile<0, 8, [
375347
SDTCisInt<0>, // i8 tgt
@@ -534,29 +506,3 @@ def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$vcc
534506
def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2),
535507
[(int_amdgcn_perm node:$src0, node:$src1, node:$src2),
536508
(AMDGPUperm_impl node:$src0, node:$src1, node:$src2)]>;
537-
538-
def AMDGPUreadlane : PatFrags<(ops node:$src0, node:$src1),
539-
[(int_amdgcn_readlane node:$src0, node:$src1),
540-
(AMDGPUreadlane_impl node:$src0, node:$src1)]>;
541-
542-
def AMDGPUreadfirstlane : PatFrags<(ops node:$src),
543-
[(int_amdgcn_readfirstlane node:$src),
544-
(AMDGPUreadfirstlane_impl node:$src)]>;
545-
546-
def AMDGPUwritelane : PatFrags<(ops node:$src0, node:$src1, node:$src2),
547-
[(int_amdgcn_writelane node:$src0, node:$src1, node:$src2),
548-
(AMDGPUwritelane_impl node:$src0, node:$src1, node:$src2)]>;
549-
550-
def AMDGPUpermlane16 : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$src3, node:$src4, node:$src5),
551-
[(int_amdgcn_permlane16 node:$src0, node:$src1, node:$src2, node:$src3, node:$src4, node:$src5),
552-
(AMDGPUpermlane16_impl node:$src0, node:$src1, node:$src2, node:$src3, node:$src4, node:$src5)]>;
553-
554-
def AMDGPUpermlanex16 : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$src3, node:$src4, node:$src5),
555-
[(int_amdgcn_permlanex16 node:$src0, node:$src1, node:$src2, node:$src3, node:$src4, node:$src5),
556-
(AMDGPUpermlanex16_impl node:$src0, node:$src1, node:$src2, node:$src3, node:$src4, node:$src5)]>;
557-
558-
def AMDGPUpermlane64 : PatFrags<(ops node:$src),
559-
[(int_amdgcn_permlane64 node:$src),
560-
(AMDGPUpermlane64_impl node:$src)]>;
561-
562-

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 82 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6095,46 +6095,55 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
60956095
SelectionDAG &DAG) {
60966096
EVT VT = N->getValueType(0);
60976097
unsigned ValSize = VT.getSizeInBits();
6098-
unsigned IntrinsicID = N->getConstantOperandVal(0);
6099-
bool IsPermLane16 = IntrinsicID == Intrinsic::amdgcn_permlane16 ||
6100-
IntrinsicID == Intrinsic::amdgcn_permlanex16;
6101-
SDValue Src0 = N->getOperand(1);
6098+
unsigned IID = N->getConstantOperandVal(0);
6099+
bool IsPermLane16 = IID == Intrinsic::amdgcn_permlane16 ||
6100+
IID == Intrinsic::amdgcn_permlanex16;
61026101
SDLoc SL(N);
61036102
MVT IntVT = MVT::getIntegerVT(ValSize);
61046103

6105-
auto createLaneOp = [&DAG, &SL, N](SDValue Src0, SDValue Src1, SDValue Src2,
6106-
MVT ValueT) -> SDValue {
6107-
switch (unsigned IID = N->getConstantOperandVal(0)) {
6104+
auto createLaneOp = [&DAG, &SL, N, IID](SDValue Src0, SDValue Src1, SDValue Src2,
6105+
MVT ValT) -> SDValue {
6106+
SmallVector<SDValue, 8> Operands;
6107+
switch (IID) {
6108+
case Intrinsic::amdgcn_permlane16:
6109+
case Intrinsic::amdgcn_permlanex16:
6110+
Operands.push_back(N->getOperand(6));
6111+
Operands.push_back(N->getOperand(5));
6112+
Operands.push_back(N->getOperand(4));
6113+
[[fallthrough]];
6114+
case Intrinsic::amdgcn_writelane:
6115+
Operands.push_back(Src2);
6116+
[[fallthrough]];
6117+
case Intrinsic::amdgcn_readlane:
6118+
Operands.push_back(Src1);
6119+
[[fallthrough]];
61086120
case Intrinsic::amdgcn_readfirstlane:
61096121
case Intrinsic::amdgcn_permlane64:
6110-
return DAG.getNode(IID == Intrinsic::amdgcn_readfirstlane
6111-
? AMDGPUISD::READFIRSTLANE
6112-
: AMDGPUISD::PERMLANE64,
6113-
SL, ValueT, {Src0});
6114-
case Intrinsic::amdgcn_readlane:
6115-
return DAG.getNode(AMDGPUISD::READLANE, SL, ValueT, {Src0, Src1});
6116-
case Intrinsic::amdgcn_writelane:
6117-
return DAG.getNode(AMDGPUISD::WRITELANE, SL, ValueT, {Src0, Src1, Src2});
6118-
case Intrinsic::amdgcn_permlane16:
6119-
case Intrinsic::amdgcn_permlanex16: {
6120-
SDValue Src3 = N->getOperand(4);
6121-
SDValue Src4 = N->getOperand(5);
6122-
SDValue Src5 = N->getOperand(6);
6123-
return DAG.getNode(IID == Intrinsic::amdgcn_permlane16
6124-
? AMDGPUISD::PERMLANE16
6125-
: AMDGPUISD::PERMLANEX16,
6126-
SL, ValueT, {Src0, Src1, Src2, Src3, Src4, Src5});
6127-
}
6122+
Operands.push_back(Src0);
6123+
break;
61286124
default:
61296125
llvm_unreachable("unhandled lane op");
61306126
}
6127+
6128+
Operands.push_back(DAG.getTargetConstant(IID, SL, MVT::i32));
6129+
std::reverse(Operands.begin(), Operands.end());
6130+
6131+
if (SDNode *GL = N->getGluedNode()) {
6132+
assert(GL->getOpcode() == ISD::CONVERGENCECTRL_GLUE);
6133+
GL = GL->getOperand(0).getNode();
6134+
Operands.push_back(DAG.getNode(ISD::CONVERGENCECTRL_GLUE, SL, MVT::Glue,
6135+
SDValue(GL, 0)));
6136+
}
6137+
6138+
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, ValT, Operands);
61316139
};
61326140

6141+
SDValue Src0 = N->getOperand(1);
61336142
SDValue Src1, Src2;
6134-
if (IntrinsicID == Intrinsic::amdgcn_readlane ||
6135-
IntrinsicID == Intrinsic::amdgcn_writelane || IsPermLane16) {
6143+
if (IID == Intrinsic::amdgcn_readlane ||
6144+
IID == Intrinsic::amdgcn_writelane || IsPermLane16) {
61366145
Src1 = N->getOperand(2);
6137-
if (IntrinsicID == Intrinsic::amdgcn_writelane || IsPermLane16)
6146+
if (IID == Intrinsic::amdgcn_writelane || IsPermLane16)
61386147
Src2 = N->getOperand(3);
61396148
}
61406149

@@ -6153,7 +6162,7 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
61536162
SL, MVT::i32);
61546163
}
61556164

6156-
if (IntrinsicID == Intrinsic::amdgcn_writelane) {
6165+
if (IID == Intrinsic::amdgcn_writelane) {
61576166
Src2 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src2) : Src2,
61586167
SL, MVT::i32);
61596168
}
@@ -6165,14 +6174,54 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
61656174

61666175
if (ValSize % 32 != 0)
61676176
return SDValue();
6177+
6178+
auto unrollLaneOp = [&DAG, &SL](SDNode *N) -> SDValue {
6179+
EVT VT = N->getValueType(0);
6180+
unsigned NE = VT.getVectorNumElements();
6181+
EVT EltVT = VT.getVectorElementType();
6182+
SmallVector<SDValue, 8> Scalars;
6183+
unsigned NumOperands = N->getNumOperands();
6184+
SmallVector<SDValue, 4> Operands(NumOperands);
6185+
SDNode *GL = N->getGluedNode();
6186+
6187+
// only handle convergencectrl_glue
6188+
assert(!GL || GL->getOpcode() == ISD::CONVERGENCECTRL_GLUE);
6189+
6190+
for (unsigned i = 0; i != NE; ++i) {
6191+
for (unsigned j = 0, e = GL ? NumOperands - 1 : NumOperands; j != e;
6192+
++j) {
6193+
SDValue Operand = N->getOperand(j);
6194+
EVT OperandVT = Operand.getValueType();
6195+
if (OperandVT.isVector()) {
6196+
// A vector operand; extract a single element.
6197+
EVT OperandEltVT = OperandVT.getVectorElementType();
6198+
Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, OperandEltVT,
6199+
Operand, DAG.getVectorIdxConstant(i, SL));
6200+
} else {
6201+
// A scalar operand; just use it as is.
6202+
Operands[j] = Operand;
6203+
}
6204+
}
6205+
6206+
if (GL)
6207+
Operands[NumOperands - 1] =
6208+
DAG.getNode(ISD::CONVERGENCECTRL_GLUE, SL, MVT::Glue,
6209+
SDValue(GL->getOperand(0).getNode(), 0));
6210+
6211+
Scalars.push_back(DAG.getNode(N->getOpcode(), SL, EltVT, Operands));
6212+
}
6213+
6214+
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NE);
6215+
return DAG.getBuildVector(VecVT, SL, Scalars);
6216+
};
61686217

61696218
if (VT.isVector()) {
61706219
switch (MVT::SimpleValueType EltTy =
61716220
VT.getVectorElementType().getSimpleVT().SimpleTy) {
61726221
case MVT::i32:
61736222
case MVT::f32: {
61746223
SDValue LaneOp = createLaneOp(Src0, Src1, Src2, VT.getSimpleVT());
6175-
return DAG.UnrollVectorOp(LaneOp.getNode());
6224+
return unrollLaneOp(LaneOp.getNode());
61766225
}
61776226
case MVT::i16:
61786227
case MVT::f16:
@@ -6188,7 +6237,7 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
61886237
Src1SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, SubVecVT, Src1,
61896238
DAG.getConstant(EltIdx, SL, MVT::i32));
61906239

6191-
if (IntrinsicID == Intrinsic::amdgcn_writelane)
6240+
if (IID == Intrinsic::amdgcn_writelane)
61926241
Src2SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, SubVecVT, Src2,
61936242
DAG.getConstant(EltIdx, SL, MVT::i32));
61946243

@@ -6212,11 +6261,11 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
62126261
if (IsPermLane16)
62136262
Src1 = DAG.getBitcast(VecVT, Src1);
62146263

6215-
if (IntrinsicID == Intrinsic::amdgcn_writelane)
6264+
if (IID == Intrinsic::amdgcn_writelane)
62166265
Src2 = DAG.getBitcast(VecVT, Src2);
62176266

62186267
SDValue LaneOp = createLaneOp(Src0, Src1, Src2, VecVT);
6219-
SDValue UnrolledLaneOp = DAG.UnrollVectorOp(LaneOp.getNode());
6268+
SDValue UnrolledLaneOp = unrollLaneOp(LaneOp.getNode());
62206269
return DAG.getBitcast(VT, UnrolledLaneOp);
62216270
}
62226271

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3389,7 +3389,7 @@ def : GCNPat<
33893389
// FIXME: Should also do this for readlane, but tablegen crashes on
33903390
// the ignored src1.
33913391
def : GCNPat<
3392-
(i32 (AMDGPUreadfirstlane (i32 imm:$src))),
3392+
(i32 (int_amdgcn_readfirstlane (i32 imm:$src))),
33933393
(S_MOV_B32 SReg_32:$src)
33943394
>;
33953395

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32", VOP_READFIRSTLANE,
255255
}
256256

257257
foreach vt = Reg32Types.types in {
258-
def : GCNPat<(vt (AMDGPUreadfirstlane (vt VRegOrLdsSrc_32:$src0))),
258+
def : GCNPat<(vt (int_amdgcn_readfirstlane (vt VRegOrLdsSrc_32:$src0))),
259259
(V_READFIRSTLANE_B32 (vt VRegOrLdsSrc_32:$src0))
260260
>;
261261
}
@@ -743,7 +743,7 @@ let SubtargetPredicate = isGFX11Plus in {
743743
} // End SubtargetPredicate = isGFX11Plus
744744

745745
foreach vt = Reg32Types.types in {
746-
def : GCNPat<(AMDGPUpermlane64 (vt VRegSrc_32:$src0)),
746+
def : GCNPat<(int_amdgcn_permlane64 (vt VRegSrc_32:$src0)),
747747
(vt (V_PERMLANE64_B32 (vt VRegSrc_32:$src0)))
748748
>;
749749
}

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -789,11 +789,11 @@ def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, []> {
789789
} // End isConvergent = 1
790790

791791
foreach vt = Reg32Types.types in {
792-
def : GCNPat<(vt (AMDGPUreadlane vt:$src0, i32:$src1)),
792+
def : GCNPat<(vt (int_amdgcn_readlane vt:$src0, i32:$src1)),
793793
(V_READLANE_B32 VRegOrLdsSrc_32:$src0, SCSrc_b32:$src1)
794794
>;
795795

796-
def : GCNPat<(vt (AMDGPUwritelane vt:$src0, i32:$src1, vt:$src2)),
796+
def : GCNPat<(vt (int_amdgcn_writelane vt:$src0, i32:$src1, vt:$src2)),
797797
(V_WRITELANE_B32 SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$src2)
798798
>;
799799
}

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -841,8 +841,8 @@ def gi_opsel_i1timm : GICustomOperandRenderer<"renderOpSelTImm">,
841841

842842
class PermlanePat<SDPatternOperator permlane,
843843
Instruction inst, ValueType vt> : GCNPat<
844-
(permlane vt:$vdst_in, vt:$src0, i32:$src1, i32:$src2,
845-
timm:$fi, timm:$bc),
844+
(vt (permlane vt:$vdst_in, vt:$src0, i32:$src1, i32:$src2,
845+
timm:$fi, timm:$bc)),
846846
(inst (opsel_i1timm $fi), VGPR_32:$src0, (opsel_i1timm $bc),
847847
SCSrc_b32:$src1, 0, SCSrc_b32:$src2, VGPR_32:$vdst_in)
848848
>;
@@ -867,8 +867,8 @@ let SubtargetPredicate = isGFX10Plus in {
867867
} // End $vdst = $vdst_in, DisableEncoding $vdst_in, IsInvalidSingleUseConsumer = 1, IsInvalidSingleUseProducer = 1
868868

869869
foreach vt = Reg32Types.types in {
870-
def : PermlanePat<AMDGPUpermlane16, V_PERMLANE16_B32_e64, vt>;
871-
def : PermlanePat<AMDGPUpermlanex16, V_PERMLANEX16_B32_e64, vt>;
870+
def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32_e64, vt>;
871+
def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
872872
}
873873

874874
defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -463,10 +463,9 @@ define void @test_readfirstlane_i16(ptr addrspace(1) %out, i16 %src) {
463463
; CHECK-SDAG: ; %bb.0:
464464
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
465465
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
466-
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, 0xffff
467-
; CHECK-SDAG-NEXT: v_and_b32_e32 v0, s4, v0
466+
; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 0xffff
468467
; CHECK-SDAG-NEXT: ;;#ASMSTART
469-
; CHECK-SDAG-NEXT: ; use v0
468+
; CHECK-SDAG-NEXT: ; use s4
470469
; CHECK-SDAG-NEXT: ;;#ASMEND
471470
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
472471
;

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -743,12 +743,11 @@ define void @test_readlane_i16(ptr addrspace(1) %out, i16 %src, i32 %src1) {
743743
; CHECK-SDAG: ; %bb.0:
744744
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
745745
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v3
746-
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, 0xffff
747-
; CHECK-SDAG-NEXT: s_nop 2
746+
; CHECK-SDAG-NEXT: s_nop 3
748747
; CHECK-SDAG-NEXT: v_readlane_b32 s4, v2, s4
749-
; CHECK-SDAG-NEXT: v_and_b32_e32 v0, s4, v0
748+
; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 0xffff
750749
; CHECK-SDAG-NEXT: ;;#ASMSTART
751-
; CHECK-SDAG-NEXT: ; use v0
750+
; CHECK-SDAG-NEXT: ; use s4
752751
; CHECK-SDAG-NEXT: ;;#ASMEND
753752
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
754753
;

0 commit comments

Comments
 (0)