Skip to content

Commit eef92f2

Browse files
committed
AMDGPU: Remove custom node for exports
I'm mildly worried about potentially reordering exp/exp_done with IntrWriteMem on the intrinsic. Requires hacking out the illegal type on SI, so manually select that case during lowering.
1 parent 25e9938 commit eef92f2

File tree

7 files changed

+55
-67
lines changed

7 files changed

+55
-67
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,7 +1156,7 @@ def int_amdgcn_exp : Intrinsic <[], [
11561156
llvm_i1_ty, // done
11571157
llvm_i1_ty // vm
11581158
],
1159-
[ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrInaccessibleMemOnly]
1159+
[ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrWriteMem, IntrInaccessibleMemOnly]
11601160
>;
11611161

11621162
// exp with compr bit set.
@@ -1167,7 +1167,7 @@ def int_amdgcn_exp_compr : Intrinsic <[], [
11671167
LLVMMatchType<0>, // src1
11681168
llvm_i1_ty, // done
11691169
llvm_i1_ty], // vm
1170-
[ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrInaccessibleMemOnly]
1170+
[ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrWriteMem, IntrInaccessibleMemOnly]
11711171
>;
11721172

11731173
def int_amdgcn_buffer_wbinvl1_sc :

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4298,8 +4298,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
42984298
NODE_NAME_CASE(MAD_U64_U32)
42994299
NODE_NAME_CASE(PERM)
43004300
NODE_NAME_CASE(TEXTURE_FETCH)
4301-
NODE_NAME_CASE(EXPORT)
4302-
NODE_NAME_CASE(EXPORT_DONE)
43034301
NODE_NAME_CASE(R600_EXPORT)
43044302
NODE_NAME_CASE(CONST_ADDRESS)
43054303
NODE_NAME_CASE(REGISTER_LOAD)

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -433,8 +433,6 @@ enum NodeType : unsigned {
433433
MUL_LOHI_U24,
434434
PERM,
435435
TEXTURE_FETCH,
436-
EXPORT, // exp on SI+
437-
EXPORT_DONE, // exp on SI+ with done bit set
438436
R600_EXPORT,
439437
CONST_ADDRESS,
440438
REGISTER_LOAD,

llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -358,12 +358,6 @@ def AMDGPUExportOp : SDTypeProfile<0, 8, [
358358

359359
]>;
360360

361-
def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp,
362-
[SDNPHasChain, SDNPMayStore]>;
363-
364-
def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp,
365-
[SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
366-
367361

368362
def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
369363

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 12 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6782,52 +6782,29 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
67826782
MachineFunction &MF = DAG.getMachineFunction();
67836783

67846784
switch (IntrinsicID) {
6785-
case Intrinsic::amdgcn_exp: {
6786-
const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(2));
6787-
const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(3));
6788-
const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(8));
6789-
const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(9));
6790-
6791-
const SDValue Ops[] = {
6792-
Chain,
6793-
DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt
6794-
DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en
6795-
Op.getOperand(4), // src0
6796-
Op.getOperand(5), // src1
6797-
Op.getOperand(6), // src2
6798-
Op.getOperand(7), // src3
6799-
DAG.getTargetConstant(0, DL, MVT::i1), // compr
6800-
DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1)
6801-
};
6802-
6803-
unsigned Opc = Done->isNullValue() ?
6804-
AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
6805-
return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
6806-
}
68076785
case Intrinsic::amdgcn_exp_compr: {
6808-
const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(2));
6809-
const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(3));
68106786
SDValue Src0 = Op.getOperand(4);
68116787
SDValue Src1 = Op.getOperand(5);
6812-
const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(6));
6813-
const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(7));
6788+
// Hack around illegal type on SI by directly selecting it.
6789+
if (isTypeLegal(Src0.getValueType()))
6790+
return SDValue();
68146791

6792+
const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(6));
68156793
SDValue Undef = DAG.getUNDEF(MVT::f32);
68166794
const SDValue Ops[] = {
6817-
Chain,
6818-
DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt
6819-
DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en
6820-
DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0),
6821-
DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1),
6795+
Op.getOperand(2), // tgt
6796+
DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0), // src0
6797+
DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1), // src1
68226798
Undef, // src2
68236799
Undef, // src3
6800+
Op.getOperand(7), // vm
68246801
DAG.getTargetConstant(1, DL, MVT::i1), // compr
6825-
DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1)
6802+
Op.getOperand(3), // en
6803+
Op.getOperand(0) // Chain
68266804
};
68276805

6828-
unsigned Opc = Done->isNullValue() ?
6829-
AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
6830-
return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
6806+
unsigned Opc = Done->isNullValue() ? AMDGPU::EXP : AMDGPU::EXP_DONE;
6807+
return SDValue(DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops), 0);
68316808
}
68326809
case Intrinsic::amdgcn_s_barrier: {
68336810
if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1101,7 +1101,7 @@ def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
11011101

11021102
def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
11031103

1104-
def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
1104+
def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
11051105

11061106
}
11071107

@@ -1380,24 +1380,21 @@ class SIMCInstr <string pseudo, int subtarget> {
13801380
// EXP classes
13811381
//===----------------------------------------------------------------------===//
13821382

1383-
class EXP_Helper<bit done, SDPatternOperator node = null_frag> : EXPCommon<
1383+
class EXP_Helper<bit done> : EXPCommon<
13841384
(outs),
13851385
(ins exp_tgt:$tgt,
13861386
ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3,
1387-
exp_vm:$vm, exp_compr:$compr, i8imm:$en),
1388-
"exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm",
1389-
[(node (i8 timm:$tgt), (i8 timm:$en),
1390-
f32:$src0, f32:$src1, f32:$src2, f32:$src3,
1391-
(i1 timm:$compr), (i1 timm:$vm))]> {
1387+
exp_vm:$vm, exp_compr:$compr, i32imm:$en),
1388+
"exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm", []> {
13921389
let AsmMatchConverter = "cvtExp";
13931390
}
13941391

13951392
// Split EXP instruction into EXP and EXP_DONE so we can set
13961393
// mayLoad for done=1.
1397-
multiclass EXP_m<bit done, SDPatternOperator node> {
1394+
multiclass EXP_m<bit done> {
13981395
let mayLoad = done, DisableWQM = 1 in {
13991396
let isPseudo = 1, isCodeGenOnly = 1 in {
1400-
def "" : EXP_Helper<done, node>,
1397+
def "" : EXP_Helper<done>,
14011398
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>;
14021399
}
14031400

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,41 @@ include "BUFInstructions.td"
2424
// EXP Instructions
2525
//===----------------------------------------------------------------------===//
2626

27-
defm EXP : EXP_m<0, AMDGPUexport>;
28-
defm EXP_DONE : EXP_m<1, AMDGPUexport_done>;
27+
defm EXP : EXP_m<0>;
28+
defm EXP_DONE : EXP_m<1>;
29+
30+
// FIXME: GlobalISel successfully imports this pattern, but fails to
31+
// select because the i1 done_val does a type check on done_val, which
32+
// only works on register operands.
33+
class ExpPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
34+
(int_amdgcn_exp timm:$tgt, timm:$en,
35+
(vt ExpSrc0:$src0), (vt ExpSrc1:$src1),
36+
(vt ExpSrc2:$src2), (vt ExpSrc3:$src3),
37+
done_val, timm:$vm),
38+
(Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1,
39+
ExpSrc2:$src2, ExpSrc3:$src3, timm:$vm, 0, timm:$en)
40+
>;
41+
42+
class ExpComprPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
43+
(int_amdgcn_exp_compr timm:$tgt, timm:$en,
44+
(vt ExpSrc0:$src0), (vt ExpSrc1:$src1),
45+
done_val, timm:$vm),
46+
(Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1,
47+
(IMPLICIT_DEF), (IMPLICIT_DEF), timm:$vm, 1, timm:$en)
48+
>;
49+
50+
// FIXME: The generated DAG matcher seems to have strange behavior
51+
// with a 1-bit literal to match, so use a -1 for checking a true
52+
// 1-bit value.
53+
def : ExpPattern<i32, EXP, 0>;
54+
def : ExpPattern<i32, EXP_DONE, -1>;
55+
def : ExpPattern<f32, EXP, 0>;
56+
def : ExpPattern<f32, EXP_DONE, -1>;
57+
58+
def : ExpComprPattern<v2i16, EXP, 0>;
59+
def : ExpComprPattern<v2i16, EXP_DONE, -1>;
60+
def : ExpComprPattern<v2f16, EXP, 0>;
61+
def : ExpComprPattern<v2f16, EXP_DONE, -1>;
2962

3063
//===----------------------------------------------------------------------===//
3164
// VINTRP Instructions
@@ -1782,15 +1815,6 @@ def : GCNPat <
17821815
SRCMODS.NONE, $src2, $clamp, $omod)
17831816
>;
17841817

1785-
// Allow integer inputs
1786-
class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : GCNPat<
1787-
(node (i8 timm:$tgt), (i8 timm:$en), vt:$src0, vt:$src1, vt:$src2, vt:$src3, (i1 timm:$compr), (i1 timm:$vm)),
1788-
(Inst i8:$tgt, vt:$src0, vt:$src1, vt:$src2, vt:$src3, i1:$vm, i1:$compr, i8:$en)
1789-
>;
1790-
1791-
def : ExpPattern<AMDGPUexport, i32, EXP>;
1792-
def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
1793-
17941818
// COPY is workaround tablegen bug from multiple outputs
17951819
// from S_LSHL_B32's multiple outputs from implicit scc def.
17961820
def : GCNPat <

0 commit comments

Comments
 (0)