Skip to content

Commit adccc0b

Browse files
committed
[X86] Add X86ISD opcodes for the Key Locker AESENC*KL and AESDEC*KL instructions
Instead of emitting MachineSDNodes during lowering, emit X86ISD opcodes. These opcodes will either be selected by tablegen patterns or custom selection code. Emitting MachineSDNodes during lowering is uncommon so this makes things more consistent. It also allows selectAddr to be called to perform address matching during instruction selection. I had trouble getting tablegen to accept XMM0-XMM7 as results in an isel pattern for the WIDE instructions so I had to use custom instruction selection.
1 parent 9b85152 commit adccc0b

File tree

6 files changed

+234
-76
lines changed

6 files changed

+234
-76
lines changed

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2448,6 +2448,14 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
24482448
Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
24492449
Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
24502450
Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
2451+
Parent->getOpcode() != X86ISD::AESENC128KL && // Fixme
2452+
Parent->getOpcode() != X86ISD::AESDEC128KL && // Fixme
2453+
Parent->getOpcode() != X86ISD::AESENC256KL && // Fixme
2454+
Parent->getOpcode() != X86ISD::AESDEC256KL && // Fixme
2455+
Parent->getOpcode() != X86ISD::AESENCWIDE128KL && // Fixme
2456+
Parent->getOpcode() != X86ISD::AESDECWIDE128KL && // Fixme
2457+
Parent->getOpcode() != X86ISD::AESENCWIDE256KL && // Fixme
2458+
Parent->getOpcode() != X86ISD::AESDECWIDE256KL && // Fixme
24512459
Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
24522460
Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
24532461
unsigned AddrSpace =
@@ -5725,6 +5733,61 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
57255733
CurDAG->RemoveDeadNode(Node);
57265734
return;
57275735
}
5736+
case X86ISD::AESENCWIDE128KL:
5737+
case X86ISD::AESDECWIDE128KL:
5738+
case X86ISD::AESENCWIDE256KL:
5739+
case X86ISD::AESDECWIDE256KL: {
5740+
unsigned Opcode;
5741+
switch (Node->getOpcode()) {
5742+
default:
5743+
llvm_unreachable("Unexpected opcode!");
5744+
case X86ISD::AESENCWIDE128KL:
5745+
Opcode = X86::AESENCWIDE128KL;
5746+
break;
5747+
case X86ISD::AESDECWIDE128KL:
5748+
Opcode = X86::AESDECWIDE128KL;
5749+
break;
5750+
case X86ISD::AESENCWIDE256KL:
5751+
Opcode = X86::AESENCWIDE256KL;
5752+
break;
5753+
case X86ISD::AESDECWIDE256KL:
5754+
Opcode = X86::AESDECWIDE256KL;
5755+
break;
5756+
}
5757+
5758+
SDValue Chain = Node->getOperand(0);
5759+
SDValue Addr = Node->getOperand(1);
5760+
5761+
SDValue Base, Scale, Index, Disp, Segment;
5762+
if (!selectAddr(Node, Addr, Base, Scale, Index, Disp, Segment))
5763+
break;
5764+
5765+
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(2),
5766+
SDValue());
5767+
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(3),
5768+
Chain.getValue(1));
5769+
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM2, Node->getOperand(4),
5770+
Chain.getValue(1));
5771+
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM3, Node->getOperand(5),
5772+
Chain.getValue(1));
5773+
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM4, Node->getOperand(6),
5774+
Chain.getValue(1));
5775+
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM5, Node->getOperand(7),
5776+
Chain.getValue(1));
5777+
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM6, Node->getOperand(8),
5778+
Chain.getValue(1));
5779+
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM7, Node->getOperand(9),
5780+
Chain.getValue(1));
5781+
5782+
SDVTList VTs = CurDAG->getVTList(
5783+
{MVT::i32, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64,
5784+
MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::Other});
5785+
SDNode *Res = CurDAG->getMachineNode(
5786+
Opcode, dl, VTs,
5787+
{Base, Scale, Index, Disp, Segment, Chain, Chain.getValue(1)});
5788+
ReplaceNode(Node, Res);
5789+
return;
5790+
}
57285791
}
57295792

57305793
SelectCode(Node);

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 35 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -26032,118 +26032,73 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
2603226032
case Intrinsic::x86_aesenc256kl:
2603326033
case Intrinsic::x86_aesdec256kl: {
2603426034
SDLoc DL(Op);
26035-
SDVTList VTs = DAG.getVTList(MVT::v16i8, MVT::Other, MVT::Glue);
26035+
SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::i32, MVT::Other);
2603626036
SDValue Chain = Op.getOperand(0);
2603726037
unsigned Opcode;
2603826038

2603926039
switch (IntNo) {
2604026040
default: llvm_unreachable("Impossible intrinsic");
2604126041
case Intrinsic::x86_aesenc128kl:
26042-
Opcode = X86::AESENC128KL;
26042+
Opcode = X86ISD::AESENC128KL;
2604326043
break;
2604426044
case Intrinsic::x86_aesdec128kl:
26045-
Opcode = X86::AESDEC128KL;
26045+
Opcode = X86ISD::AESDEC128KL;
2604626046
break;
2604726047
case Intrinsic::x86_aesenc256kl:
26048-
Opcode = X86::AESENC256KL;
26048+
Opcode = X86ISD::AESENC256KL;
2604926049
break;
2605026050
case Intrinsic::x86_aesdec256kl:
26051-
Opcode = X86::AESDEC256KL;
26051+
Opcode = X86ISD::AESDEC256KL;
2605226052
break;
2605326053
}
2605426054

26055-
SDValue XMM = Op.getOperand(2);
26056-
SDValue Base = Op.getOperand(3);
26057-
SDValue Index = DAG.getRegister(0, MVT::i32);
26058-
SDValue Scale = DAG.getTargetConstant(1, DL, MVT::i8);
26059-
SDValue Disp = DAG.getTargetConstant(0, DL, MVT::i32);
26060-
SDValue Segment = DAG.getRegister(0, MVT::i32);
26061-
26062-
SDNode *Res = DAG.getMachineNode(Opcode, DL, VTs, {XMM, Base, Scale, Index,
26063-
Disp, Segment, Chain});
26064-
Chain = SDValue(Res, 1);
26065-
SDValue EFLAGS = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32,
26066-
SDValue(Res, 2));
26067-
SDValue ZF = getSETCC(X86::COND_E, EFLAGS.getValue(0), DL, DAG);
26055+
SDValue Operation = DAG.getNode(Opcode, DL, VTs, Chain, Op.getOperand(2),
26056+
Op.getOperand(3));
26057+
SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(1), DL, DAG);
2606826058

2606926059
return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(),
26070-
{ZF, SDValue(Res, 0), EFLAGS.getValue(1)});
26060+
{ZF, Operation.getValue(0), Operation.getValue(2)});
2607126061
}
2607226062
case Intrinsic::x86_aesencwide128kl:
2607326063
case Intrinsic::x86_aesdecwide128kl:
2607426064
case Intrinsic::x86_aesencwide256kl:
2607526065
case Intrinsic::x86_aesdecwide256kl: {
2607626066
SDLoc DL(Op);
26077-
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
26067+
SDVTList VTs = DAG.getVTList(
26068+
{MVT::i32, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64,
26069+
MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::Other});
2607826070
SDValue Chain = Op.getOperand(0);
2607926071
unsigned Opcode;
2608026072

2608126073
switch (IntNo) {
2608226074
default: llvm_unreachable("Impossible intrinsic");
2608326075
case Intrinsic::x86_aesencwide128kl:
26084-
Opcode = X86::AESENCWIDE128KL;
26076+
Opcode = X86ISD::AESENCWIDE128KL;
2608526077
break;
2608626078
case Intrinsic::x86_aesdecwide128kl:
26087-
Opcode = X86::AESDECWIDE128KL;
26079+
Opcode = X86ISD::AESDECWIDE128KL;
2608826080
break;
2608926081
case Intrinsic::x86_aesencwide256kl:
26090-
Opcode = X86::AESENCWIDE256KL;
26082+
Opcode = X86ISD::AESENCWIDE256KL;
2609126083
break;
2609226084
case Intrinsic::x86_aesdecwide256kl:
26093-
Opcode = X86::AESDECWIDE256KL;
26085+
Opcode = X86ISD::AESDECWIDE256KL;
2609426086
break;
2609526087
}
2609626088

26097-
SDValue Base = Op.getOperand(2);
26098-
SDValue Index = DAG.getRegister(0, MVT::i32);
26099-
SDValue Scale = DAG.getTargetConstant(1, DL, MVT::i8);
26100-
SDValue Disp = DAG.getTargetConstant(0, DL, MVT::i32);
26101-
SDValue Segment = DAG.getRegister(0, MVT::i32);
26089+
SDValue Operation = DAG.getNode(
26090+
Opcode, DL, VTs,
26091+
{Chain, Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
26092+
Op.getOperand(5), Op.getOperand(6), Op.getOperand(7),
26093+
Op.getOperand(8), Op.getOperand(9), Op.getOperand(10)});
26094+
SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(0), DL, DAG);
2610226095

26103-
Chain = DAG.getCopyToReg(Chain, DL, X86::XMM0, Op->getOperand(3),
26104-
SDValue());
26105-
Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM1,
26106-
Op->getOperand(4), Chain.getValue(1));
26107-
Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM2,
26108-
Op->getOperand(5), Chain.getValue(1));
26109-
Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM3,
26110-
Op->getOperand(6), Chain.getValue(1));
26111-
Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM4,
26112-
Op->getOperand(7), Chain.getValue(1));
26113-
Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM5,
26114-
Op->getOperand(8), Chain.getValue(1));
26115-
Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM6,
26116-
Op->getOperand(9), Chain.getValue(1));
26117-
Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM7,
26118-
Op->getOperand(10),Chain.getValue(1));
26119-
26120-
SDNode *Res = DAG.getMachineNode(Opcode, DL, VTs, {Base, Scale, Index,
26121-
Disp, Segment, Chain,
26122-
Chain.getValue(1)});
26123-
26124-
Chain = SDValue(Res, 0);
26125-
SDValue EFLAGS = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32,
26126-
SDValue(Res, 1));
26127-
SDValue ZF = getSETCC(X86::COND_E, EFLAGS.getValue(0), DL, DAG);
26128-
SDValue XMM0 = DAG.getCopyFromReg(EFLAGS.getValue(1), DL, X86::XMM0,
26129-
MVT::v16i8, EFLAGS.getValue(2));
26130-
SDValue XMM1 = DAG.getCopyFromReg(XMM0.getValue(1), DL, X86::XMM1,
26131-
MVT::v16i8, XMM0.getValue(2));
26132-
SDValue XMM2 = DAG.getCopyFromReg(XMM1.getValue(1), DL, X86::XMM2,
26133-
MVT::v16i8, XMM1.getValue(2));
26134-
SDValue XMM3 = DAG.getCopyFromReg(XMM2.getValue(1), DL, X86::XMM3,
26135-
MVT::v16i8, XMM2.getValue(2));
26136-
SDValue XMM4 = DAG.getCopyFromReg(XMM3.getValue(1), DL, X86::XMM4,
26137-
MVT::v16i8, XMM3.getValue(2));
26138-
SDValue XMM5 = DAG.getCopyFromReg(XMM4.getValue(1), DL, X86::XMM5,
26139-
MVT::v16i8, XMM4.getValue(2));
26140-
SDValue XMM6 = DAG.getCopyFromReg(XMM5.getValue(1), DL, X86::XMM6,
26141-
MVT::v16i8, XMM5.getValue(2));
26142-
SDValue XMM7 = DAG.getCopyFromReg(XMM6.getValue(1), DL, X86::XMM7,
26143-
MVT::v16i8, XMM6.getValue(2));
2614426096
return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(),
26145-
{ZF, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
26146-
XMM7.getValue(1)});
26097+
{ZF, Operation.getValue(1), Operation.getValue(2),
26098+
Operation.getValue(3), Operation.getValue(4),
26099+
Operation.getValue(5), Operation.getValue(6),
26100+
Operation.getValue(7), Operation.getValue(8),
26101+
Operation.getValue(9)});
2614726102
}
2614826103
}
2614926104
return SDValue();
@@ -31167,6 +31122,14 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3116731122
NODE_NAME_CASE(ENQCMD)
3116831123
NODE_NAME_CASE(ENQCMDS)
3116931124
NODE_NAME_CASE(VP2INTERSECT)
31125+
NODE_NAME_CASE(AESENC128KL)
31126+
NODE_NAME_CASE(AESDEC128KL)
31127+
NODE_NAME_CASE(AESENC256KL)
31128+
NODE_NAME_CASE(AESDEC256KL)
31129+
NODE_NAME_CASE(AESENCWIDE128KL)
31130+
NODE_NAME_CASE(AESDECWIDE128KL)
31131+
NODE_NAME_CASE(AESENCWIDE256KL)
31132+
NODE_NAME_CASE(AESDECWIDE256KL)
3117031133
}
3117131134
return nullptr;
3117231135
#undef NODE_NAME_CASE

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,16 @@ namespace llvm {
713713
// Mwaitx builtin is lowered to this if the base pointer needs saving.
714714
MWAITX_DAG,
715715

716+
// Key locker nodes that produce flags.
717+
AESENC128KL,
718+
AESDEC128KL,
719+
AESENC256KL,
720+
AESDEC256KL,
721+
AESENCWIDE128KL,
722+
AESDECWIDE128KL,
723+
AESENCWIDE256KL,
724+
AESDECWIDE256KL,
725+
716726
/// X86 strict FP compare instructions.
717727
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
718728
STRICT_FCMPS,

llvm/lib/Target/X86/X86InstrInfo.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,11 @@ def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
135135
def SDT_X86ENQCMD : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
136136
SDTCisPtrTy<1>, SDTCisSameAs<1, 2>]>;
137137

138+
def SDT_X86AESENCDECKL : SDTypeProfile<2, 2, [SDTCisVT<0, v2i64>,
139+
SDTCisVT<1, i32>,
140+
SDTCisVT<2, v2i64>,
141+
SDTCisPtrTy<3>]>;
142+
138143
def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
139144
[SDNPHasChain,SDNPSideEffect]>;
140145
def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
@@ -331,6 +336,15 @@ def X86enqcmd : SDNode<"X86ISD::ENQCMD", SDT_X86ENQCMD,
331336
def X86enqcmds : SDNode<"X86ISD::ENQCMDS", SDT_X86ENQCMD,
332337
[SDNPHasChain, SDNPSideEffect]>;
333338

339+
def X86aesenc128kl : SDNode<"X86ISD::AESENC128KL", SDT_X86AESENCDECKL,
340+
[SDNPHasChain, SDNPSideEffect]>;
341+
def X86aesdec128kl : SDNode<"X86ISD::AESDEC128KL", SDT_X86AESENCDECKL,
342+
[SDNPHasChain, SDNPSideEffect]>;
343+
def X86aesenc256kl : SDNode<"X86ISD::AESENC256KL", SDT_X86AESENCDECKL,
344+
[SDNPHasChain, SDNPSideEffect]>;
345+
def X86aesdec256kl : SDNode<"X86ISD::AESDEC256KL", SDT_X86AESENCDECKL,
346+
[SDNPHasChain, SDNPSideEffect]>;
347+
334348
//===----------------------------------------------------------------------===//
335349
// X86 Operand Definitions.
336350
//

llvm/lib/Target/X86/X86InstrKL.td

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,24 @@ let SchedRW = [WriteSystem], Predicates = [HasKL] in {
3636
let Constraints = "$src1 = $dst",
3737
Defs = [EFLAGS] in {
3838
def AESENC128KL : I<0xDC, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2),
39-
"aesenc128kl\t{$src2, $src1|$src1, $src2}", []>, T8XS;
39+
"aesenc128kl\t{$src2, $src1|$src1, $src2}",
40+
[(set VR128:$dst, EFLAGS,
41+
(X86aesenc128kl VR128:$src1, addr:$src2))]>, T8XS;
4042

4143
def AESDEC128KL : I<0xDD, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2),
42-
"aesdec128kl\t{$src2, $src1|$src1, $src2}", []>, T8XS;
44+
"aesdec128kl\t{$src2, $src1|$src1, $src2}",
45+
[(set VR128:$dst, EFLAGS,
46+
(X86aesdec128kl VR128:$src1, addr:$src2))]>, T8XS;
4347

4448
def AESENC256KL : I<0xDE, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2),
45-
"aesenc256kl\t{$src2, $src1|$src1, $src2}", []>, T8XS;
49+
"aesenc256kl\t{$src2, $src1|$src1, $src2}",
50+
[(set VR128:$dst, EFLAGS,
51+
(X86aesenc256kl VR128:$src1, addr:$src2))]>, T8XS;
4652

4753
def AESDEC256KL : I<0xDF, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2),
48-
"aesdec256kl\t{$src2, $src1|$src1, $src2}", []>, T8XS;
54+
"aesdec256kl\t{$src2, $src1|$src1, $src2}",
55+
[(set VR128:$dst, EFLAGS,
56+
(X86aesdec256kl VR128:$src1, addr:$src2))]>, T8XS;
4957
}
5058

5159
} // SchedRW, Predicates

0 commit comments

Comments
 (0)