Skip to content

Commit d1b606f

Browse files
author
Simon Moll
committed
[VE] Extract & insert vector element isel
Isel and tests for extract_vector_elt and insert_vector_elt. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D93687
1 parent 2fe625e commit d1b606f

File tree

6 files changed

+589
-13
lines changed

6 files changed

+589
-13
lines changed

llvm/lib/Target/VE/VEISelLowering.cpp

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ bool VETargetLowering::CanLowerReturn(
7474
static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
7575
MVT::v256f32, MVT::v512f32, MVT::v256f64};
7676

77+
static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
78+
7779
void VETargetLowering::initRegisterClasses() {
7880
// Set up the register classes.
7981
addRegisterClass(MVT::i32, &VE::I32RegClass);
@@ -292,6 +294,8 @@ void VETargetLowering::initSPUActions() {
292294
void VETargetLowering::initVPUActions() {
293295
for (MVT LegalVecVT : AllVectorVTs) {
294296
setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
297+
setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
298+
setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
295299
// Translate all vector instructions with legal element types to VVP_*
296300
// nodes.
297301
// TODO We will custom-widen into VVP_* nodes in the future. While we are
@@ -301,6 +305,11 @@ void VETargetLowering::initVPUActions() {
301305
setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
302306
#include "VVPNodes.def"
303307
}
308+
309+
for (MVT LegalPackedVT : AllPackedVTs) {
310+
setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
311+
setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
312+
}
304313
}
305314

306315
SDValue
@@ -1662,6 +1671,11 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
16621671
case ISD::VAARG:
16631672
return lowerVAARG(Op, DAG);
16641673

1674+
case ISD::INSERT_VECTOR_ELT:
1675+
return lowerINSERT_VECTOR_ELT(Op, DAG);
1676+
case ISD::EXTRACT_VECTOR_ELT:
1677+
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1678+
16651679
#define ADD_BINARY_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
16661680
#include "VVPNodes.def"
16671681
return lowerToVVP(Op, DAG);
@@ -2661,3 +2675,100 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
26612675
}
26622676
llvm_unreachable("lowerToVVP called for unexpected SDNode.");
26632677
}
2678+
2679+
SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2680+
SelectionDAG &DAG) const {
2681+
assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
2682+
MVT VT = Op.getOperand(0).getSimpleValueType();
2683+
2684+
// Special treatment for packed V64 types.
2685+
assert(VT == MVT::v512i32 || VT == MVT::v512f32);
2686+
// Example of codes:
2687+
// %packed_v = extractelt %vr, %idx / 2
2688+
// %v = %packed_v >> (%idx % 2 * 32)
2689+
// %res = %v & 0xffffffff
2690+
2691+
SDValue Vec = Op.getOperand(0);
2692+
SDValue Idx = Op.getOperand(1);
2693+
SDLoc DL(Op);
2694+
SDValue Result = Op;
2695+
if (0 /* Idx->isConstant() */) {
2696+
// TODO: optimized implementation using constant values
2697+
} else {
2698+
SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
2699+
SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
2700+
SDValue PackedElt =
2701+
SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
2702+
SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
2703+
SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
2704+
SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
2705+
Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
2706+
PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
2707+
SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
2708+
PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
2709+
SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
2710+
Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
2711+
MVT::i32, PackedElt, SubI32),
2712+
0);
2713+
2714+
if (Op.getSimpleValueType() == MVT::f32) {
2715+
Result = DAG.getBitcast(MVT::f32, Result);
2716+
} else {
2717+
assert(Op.getSimpleValueType() == MVT::i32);
2718+
}
2719+
}
2720+
return Result;
2721+
}
2722+
2723+
SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2724+
SelectionDAG &DAG) const {
2725+
assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
2726+
MVT VT = Op.getOperand(0).getSimpleValueType();
2727+
2728+
// Special treatment for packed V64 types.
2729+
assert(VT == MVT::v512i32 || VT == MVT::v512f32);
2730+
// The v512i32 and v512f32 starts from upper bits (0..31). This "upper
2731+
// bits" required `val << 32` from C implementation's point of view.
2732+
//
2733+
// Example of codes:
2734+
// %packed_elt = extractelt %vr, (%idx >> 1)
2735+
// %shift = ((%idx & 1) ^ 1) << 5
2736+
// %packed_elt &= 0xffffffff00000000 >> shift
2737+
// %packed_elt |= (zext %val) << shift
2738+
// %vr = insertelt %vr, %packed_elt, (%idx >> 1)
2739+
2740+
SDLoc DL(Op);
2741+
SDValue Vec = Op.getOperand(0);
2742+
SDValue Val = Op.getOperand(1);
2743+
SDValue Idx = Op.getOperand(2);
2744+
if (Idx.getSimpleValueType() == MVT::i32)
2745+
Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
2746+
if (Val.getSimpleValueType() == MVT::f32)
2747+
Val = DAG.getBitcast(MVT::i32, Val);
2748+
assert(Val.getSimpleValueType() == MVT::i32);
2749+
Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
2750+
2751+
SDValue Result = Op;
2752+
if (0 /* Idx->isConstant()*/) {
2753+
// TODO: optimized implementation using constant values
2754+
} else {
2755+
SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
2756+
SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
2757+
SDValue PackedElt =
2758+
SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
2759+
SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
2760+
SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
2761+
SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
2762+
Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
2763+
SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
2764+
Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
2765+
PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
2766+
Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
2767+
PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
2768+
Result =
2769+
SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
2770+
{HalfIdx, PackedElt, Vec}),
2771+
0);
2772+
}
2773+
return Result;
2774+
}

llvm/lib/Target/VE/VEISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ class VETargetLowering : public TargetLowering {
130130
SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const;
131131

132132
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
133+
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
134+
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
133135
/// } Custom Lower
134136

135137
/// Replace the results of node with an illegal result

llvm/lib/Target/VE/VEInstrInfo.td

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,6 +1569,17 @@ defm SHMB : SHMm<"shm.b", 0x31, I64>;
15691569
// Pattern Matchings
15701570
//===----------------------------------------------------------------------===//
15711571

1572+
// Basic cast between registers. This is often used in ISel patterns, so make
1573+
// them as OutPatFrag.
1574+
def i2l : OutPatFrag<(ops node:$exp),
1575+
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), $exp, sub_i32)>;
1576+
def l2i : OutPatFrag<(ops node:$exp),
1577+
(EXTRACT_SUBREG $exp, sub_i32)>;
1578+
def f2l : OutPatFrag<(ops node:$exp),
1579+
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), $exp, sub_f32)>;
1580+
def l2f : OutPatFrag<(ops node:$exp),
1581+
(EXTRACT_SUBREG $exp, sub_f32)>;
1582+
15721583
// Small immediates.
15731584
def : Pat<(i32 simm7:$val), (EXTRACT_SUBREG (ORim (LO7 $val), 0), sub_i32)>;
15741585
def : Pat<(i64 simm7:$val), (ORim (LO7 $val), 0)>;
@@ -1782,9 +1793,6 @@ defm : ATMLDm<atomic_load_16, LD2BZXrri, LD2BZXrii, LD2BZXzri, LD2BZXzii>;
17821793
defm : ATMLDm<atomic_load_32, LDLZXrri, LDLZXrii, LDLZXzri, LDLZXzii>;
17831794
defm : ATMLDm<atomic_load_64, LDrri, LDrii, LDzri, LDzii>;
17841795

1785-
def i2l : OutPatFrag<(ops node:$exp),
1786-
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), $exp, sub_i32)>;
1787-
17881796
// Optimized atomic loads with sext
17891797
multiclass SXATMLDm<SDPatternOperator from, Operand TY,
17901798
SDPatternOperator torri, SDPatternOperator torii,

llvm/lib/Target/VE/VEInstrPatternsVec.td

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,19 @@
1515
// Instruction format superclass
1616
//===----------------------------------------------------------------------===//
1717

18-
multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp, SDNodeXForm ImmCast, int SubRegIdx> {
18+
multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp,
19+
SDNodeXForm ImmCast, SDNodeXForm SuperRegCast> {
1920
// VBRDil
2021
def : Pat<(v32 (vec_broadcast (s32 ImmOp:$sy), i32:$vl)),
2122
(VBRDil (ImmCast $sy), i32:$vl)>;
2223

2324
// VBRDrl
2425
def : Pat<(v32 (vec_broadcast s32:$sy, i32:$vl)),
25-
(VBRDrl
26-
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), $sy, SubRegIdx),
27-
i32:$vl)>;
26+
(VBRDrl (SuperRegCast $sy), i32:$vl)>;
2827
}
2928

30-
defm : vbrd_elem32<v256f32, f32, simm7fp, LO7FP, sub_f32>;
31-
defm : vbrd_elem32<v256i32, i32, simm7, LO7, sub_i32>;
32-
33-
multiclass vbrd_elem64<ValueType v64, ValueType s64, SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
29+
multiclass vbrd_elem64<ValueType v64, ValueType s64,
30+
SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
3431
// VBRDil
3532
def : Pat<(v64 (vec_broadcast (s64 ImmOp:$sy), i32:$vl)),
3633
(VBRDil (ImmCast $sy), i32:$vl)>;
@@ -40,5 +37,55 @@ multiclass vbrd_elem64<ValueType v64, ValueType s64, SDPatternOperator ImmOp, SD
4037
(VBRDrl s64:$sy, i32:$vl)>;
4138
}
4239

43-
defm : vbrd_elem64<v256f64, f64, simm7fp, LO7FP>;
44-
defm : vbrd_elem64<v256i64, i64, simm7, LO7>;
40+
multiclass extract_insert_elem32<ValueType v32, ValueType s32,
41+
SDNodeXForm SubRegCast,
42+
SDNodeXForm SuperRegCast> {
43+
// LVSvi
44+
def: Pat<(s32 (extractelt v32:$vec, uimm7:$idx)),
45+
(SubRegCast (LVSvi v32:$vec, (ULO7 $idx)))>;
46+
// LVSvr
47+
def: Pat<(s32 (extractelt v32:$vec, i64:$idx)),
48+
(SubRegCast (LVSvr v32:$vec, $idx))>;
49+
50+
// LSVir
51+
def: Pat<(v32 (insertelt v32:$vec, s32:$val, uimm7:$idx)),
52+
(LSVir_v (ULO7 $idx), (SuperRegCast $val), $vec)>;
53+
// LSVrr
54+
def: Pat<(v32 (insertelt v32:$vec, s32:$val, i64:$idx)),
55+
(LSVrr_v $idx, (SuperRegCast $val), $vec)>;
56+
}
57+
58+
multiclass extract_insert_elem64<ValueType v64, ValueType s64> {
59+
// LVSvi
60+
def: Pat<(s64 (extractelt v64:$vec, uimm7:$idx)),
61+
(LVSvi v64:$vec, (ULO7 $idx))>;
62+
// LVSvr
63+
def: Pat<(s64 (extractelt v64:$vec, i64:$idx)),
64+
(LVSvr v64:$vec, $idx)>;
65+
66+
// LSVir
67+
def: Pat<(v64 (insertelt v64:$vec, s64:$val, uimm7:$idx)),
68+
(LSVir_v (ULO7 $idx), $val, $vec)>;
69+
// LSVrr
70+
def: Pat<(v64 (insertelt v64:$vec, s64:$val, i64:$idx)),
71+
(LSVrr_v $idx, $val, $vec)>;
72+
}
73+
74+
multiclass patterns_elem32<ValueType v32, ValueType s32,
75+
SDPatternOperator ImmOp, SDNodeXForm ImmCast,
76+
SDNodeXForm SubRegCast, SDNodeXForm SuperRegCast> {
77+
defm : vbrd_elem32<v32, s32, ImmOp, ImmCast, SuperRegCast>;
78+
defm : extract_insert_elem32<v32, s32, SubRegCast, SuperRegCast>;
79+
}
80+
81+
multiclass patterns_elem64<ValueType v64, ValueType s64,
82+
SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
83+
defm : vbrd_elem64<v64, s64, ImmOp, ImmCast>;
84+
defm : extract_insert_elem64<v64, s64>;
85+
}
86+
87+
defm : patterns_elem32<v256i32, i32, simm7, LO7, l2i, i2l>;
88+
defm : patterns_elem32<v256f32, f32, simm7fp, LO7FP, l2f, f2l>;
89+
90+
defm : patterns_elem64<v256i64, i64, simm7, LO7>;
91+
defm : patterns_elem64<v256f64, f64, simm7fp, LO7FP>;

0 commit comments

Comments
 (0)