Skip to content

Commit 820df6e

Browse files
iajbarKrzysztof Parzyszek
authored andcommitted
[Hexagon] Lower vector predicate store
This patch lowers store of vector predicate of type v128i1.
1 parent 95291a0 commit 820df6e

File tree

4 files changed

+121
-11
lines changed

4 files changed

+121
-11
lines changed

llvm/lib/Target/Hexagon/HexagonISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1681,6 +1681,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
16811681
setOperationAction(ISD::STORE, VT, Custom);
16821682
}
16831683

1684+
setOperationAction(ISD::STORE, MVT::v128i1, Custom);
1685+
16841686
for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v8i8, MVT::v2i32, MVT::v4i16,
16851687
MVT::v2i32}) {
16861688
setCondCodeAction(ISD::SETNE, VT, Expand);

llvm/lib/Target/Hexagon/HexagonISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,8 @@ namespace HexagonISD {
456456
SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const;
457457
SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const;
458458
SDValue LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const;
459+
SDValue LowerHvxStore(SDValue Op, SelectionDAG &DAG) const;
460+
SDValue HvxVecPredBitcastComputation(SDValue Op, SelectionDAG &DAG) const;
459461

460462
SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const;
461463
SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp

Lines changed: 70 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,17 +1437,12 @@ HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
14371437
return T7;
14381438
}
14391439

1440-
SDValue HexagonTargetLowering::LowerHvxBitcast(SDValue Op,
1441-
SelectionDAG &DAG) const {
1442-
auto *N = Op.getNode();
1443-
EVT VT = N->getValueType(0);
1440+
// This function does the computation needed to bitcast a vector of predicate
1441+
// register to a vector of integers.
1442+
SDValue
1443+
HexagonTargetLowering::HvxVecPredBitcastComputation(SDValue Op,
1444+
SelectionDAG &DAG) const {
14441445
const SDLoc &dl(Op);
1445-
SDValue Q0 = N->getOperand(0);
1446-
EVT VTOp = Q0.getNode()->getValueType(0);
1447-
if (!(VT == MVT::i64 || VT == MVT::i32) ||
1448-
!(VTOp == MVT::v64i1 || VTOp == MVT::v32i1)) {
1449-
return Op;
1450-
}
14511446
MVT VecTy;
14521447
int Length;
14531448
if (Subtarget.useHVX64BOps()) {
@@ -1463,7 +1458,7 @@ SDValue HexagonTargetLowering::LowerHvxBitcast(SDValue Op,
14631458
SDValue InstrC8421 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C8421, DAG);
14641459
// v0 = vand(q0,r0)
14651460
SDValue Vand =
1466-
getInstr(Hexagon::V6_vandqrt, dl, VecTy, {Q0, InstrC8421}, DAG);
1461+
getInstr(Hexagon::V6_vandqrt, dl, VecTy, {Op, InstrC8421}, DAG);
14671462

14681463
// Or the bytes in each word into a single byte: that will form packs
14691464
// of 4 bits of the output.
@@ -1548,6 +1543,22 @@ SDValue HexagonTargetLowering::LowerHvxBitcast(SDValue Op,
15481543
SDValue Vror2 = getInstr(Hexagon::V6_vror, dl, VecTy, {Vor3, InstrC4}, DAG);
15491544
// v0 = vor(v0,v1)
15501545
SDValue Vor4 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vor3, Vror2}, DAG);
1546+
return Vor4;
1547+
}
1548+
1549+
SDValue HexagonTargetLowering::LowerHvxBitcast(SDValue Op,
1550+
SelectionDAG &DAG) const {
1551+
auto *N = Op.getNode();
1552+
EVT VT = N->getValueType(0);
1553+
const SDLoc &dl(Op);
1554+
SDValue Q0 = N->getOperand(0);
1555+
EVT VTOp = Q0.getNode()->getValueType(0);
1556+
if (!(VT == MVT::i64 || VT == MVT::i32) ||
1557+
!(VTOp == MVT::v64i1 || VTOp == MVT::v32i1))
1558+
return Op;
1559+
1560+
SDValue Vor4 = HvxVecPredBitcastComputation(Q0, DAG);
1561+
15511562
// The output is v.w[8]:v.w[0]
15521563
// r3 = #0
15531564
SDValue C0 = DAG.getTargetConstant(0, dl, MVT::i32);
@@ -1567,6 +1578,53 @@ SDValue HexagonTargetLowering::LowerHvxBitcast(SDValue Op,
15671578
return Res;
15681579
}
15691580

1581+
SDValue HexagonTargetLowering::LowerHvxStore(SDValue Op,
1582+
SelectionDAG &DAG) const {
1583+
auto *N = Op.getNode();
1584+
const SDLoc &dl(Op);
1585+
SDValue Q0 = N->getOperand(1);
1586+
EVT VTOp = Q0.getNode()->getValueType(0);
1587+
if (Op.getOpcode() != ISD::STORE || VTOp != MVT::v128i1)
1588+
return Op;
1589+
SDValue Vor4 = HvxVecPredBitcastComputation(Q0, DAG);
1590+
// The output is v.w[8]:v.w[0]
1591+
// r3 = #0
1592+
SDValue C0 = DAG.getTargetConstant(0, dl, MVT::i32);
1593+
SDValue InstrC0 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C0, DAG);
1594+
// r0 = vextract(v0,r3)
1595+
SDValue Vextract0 =
1596+
getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC0}, DAG);
1597+
// r3 = #32
1598+
SDValue C32 = DAG.getTargetConstant(32, dl, MVT::i32);
1599+
SDValue InstrC32 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C32, DAG);
1600+
// r1 = vextract(v0,r3)
1601+
SDValue Vextract1 =
1602+
getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC32}, DAG);
1603+
SDValue Combine0 =
1604+
getInstr(Hexagon::A2_combinew, dl, MVT::i64, {Vextract1, Vextract0}, DAG);
1605+
// r3 = #64
1606+
SDValue C64 = DAG.getTargetConstant(64, dl, MVT::i32);
1607+
SDValue InstrC64 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C64, DAG);
1608+
// r0 = vextract(v0,r3)
1609+
SDValue Vextract2 =
1610+
getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC64}, DAG);
1611+
// r3 = #96
1612+
SDValue C96 = DAG.getTargetConstant(96, dl, MVT::i32);
1613+
SDValue InstrC96 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C96, DAG);
1614+
// r1 = vextract(v0,r3)
1615+
SDValue Vextract3 =
1616+
getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC96}, DAG);
1617+
SDValue Combine1 =
1618+
getInstr(Hexagon::A2_combinew, dl, MVT::i64, {Vextract3, Vextract2}, DAG);
1619+
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
1620+
SDValue C8 = DAG.getTargetConstant(8, dl, MVT::i32);
1621+
const SDValue Ops1[] = {ST->getBasePtr(), C8, Combine1, ST->getChain()};
1622+
SDValue Store1 = getInstr(Hexagon::S2_storerd_io, dl, MVT::Other, Ops1, DAG);
1623+
const SDValue Ops0[] = {ST->getBasePtr(), C0, Combine0, Store1};
1624+
SDValue Store0 = getInstr(Hexagon::S2_storerd_io, dl, MVT::Other, Ops0, DAG);
1625+
return Store0;
1626+
}
1627+
15701628
SDValue
15711629
HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
15721630
// Sign- and zero-extends are legal.
@@ -1740,6 +1798,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
17401798
case ISD::SETCC:
17411799
case ISD::INTRINSIC_VOID: return Op;
17421800
case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
1801+
case ISD::STORE: return LowerHvxStore(Op, DAG);
17431802
// Unaligned loads will be handled by the default lowering.
17441803
case ISD::LOAD: return SDValue();
17451804
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; RUN: llc < %s | FileCheck %s
2+
3+
; This test checks that store a vector predicate of type v128i1 is lowered
4+
; and two double stores are generated.
5+
6+
; CHECK-DAG: memd(r{{[0-9]+}}+#0) = r{{[0-9]+}}:{{[0-9]+}}
7+
; CHECK-DAG: memd(r{{[0-9]+}}+#8) = r{{[0-9]+}}:{{[0-9]+}}
8+
9+
target triple = "hexagon"
10+
11+
define dso_local void @raac_UnpackADIFHeader() local_unnamed_addr #0 {
12+
entry:
13+
br i1 undef, label %cleanup, label %if.end
14+
15+
if.end:
16+
%0 = load i8, i8* undef, align 1
17+
%conv13.i = zext i8 %0 to i32
18+
%trip.count.minus.1216 = add nsw i32 %conv13.i, -1
19+
%broadcast.splatinsert221 = insertelement <128 x i32> undef, i32 %trip.count.minus.1216, i32 0
20+
%broadcast.splat222 = shufflevector <128 x i32> %broadcast.splatinsert221, <128 x i32> undef, <128 x i32> zeroinitializer
21+
%1 = icmp ule <128 x i32> undef, %broadcast.splat222
22+
%wide.masked.load223 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* nonnull undef, i32 1, <128 x i1> %1, <128 x i8> undef)
23+
%2 = lshr <128 x i8> %wide.masked.load223, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
24+
%3 = and <128 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
25+
%4 = zext <128 x i8> %3 to <128 x i32>
26+
%5 = add nsw <128 x i32> undef, %4
27+
%6 = select <128 x i1> %1, <128 x i32> %5, <128 x i32> undef
28+
%bin.rdx225 = add <128 x i32> %6, undef
29+
%bin.rdx227 = add <128 x i32> %bin.rdx225, undef
30+
%bin.rdx229 = add <128 x i32> %bin.rdx227, undef
31+
%bin.rdx231 = add <128 x i32> %bin.rdx229, undef
32+
%bin.rdx233 = add <128 x i32> %bin.rdx231, undef
33+
%bin.rdx235 = add <128 x i32> %bin.rdx233, undef
34+
%bin.rdx237 = add <128 x i32> %bin.rdx235, undef
35+
%7 = extractelement <128 x i32> %bin.rdx237, i32 0
36+
%nChans = getelementptr inbounds i8, i8* null, i32 2160
37+
%8 = bitcast i8* %nChans to i32*
38+
store i32 %7, i32* %8, align 4
39+
br label %cleanup
40+
41+
cleanup:
42+
ret void
43+
}
44+
45+
declare <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>*, i32 immarg, <128 x i1>, <128 x i8>)
46+
47+
attributes #0 = { "target-features"="+hvx-length128b,+hvxv67,+v67,-long-calls" }

0 commit comments

Comments
 (0)