Skip to content

Commit a8a4f99

Browse files
iajbarKrzysztof Parzyszek
authored andcommitted
[Hexagon] Lower bitcast of a vector predicate
This patch lowers bitcast of vector predicate of type v32i1/v64i1 to i32/i64 type.
1 parent 9fe769a commit a8a4f99

File tree

5 files changed

+219
-7
lines changed

5 files changed

+219
-7
lines changed

llvm/lib/Target/Hexagon/HexagonISelLowering.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1694,6 +1694,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
16941694

16951695
// Custom-lower bitcasts from i8 to v8i1.
16961696
setOperationAction(ISD::BITCAST, MVT::i8, Custom);
1697+
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
1698+
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
16971699
setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
16981700
setOperationAction(ISD::VSELECT, MVT::v4i8, Custom);
16991701
setOperationAction(ISD::VSELECT, MVT::v2i16, Custom);
@@ -2266,13 +2268,16 @@ HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
22662268
const SDLoc &dl(Op);
22672269

22682270
// Handle conversion from i8 to v8i1.
2269-
if (ResTy == MVT::v8i1) {
2270-
SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
2271-
SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
2272-
return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
2271+
if (InpTy == MVT::i8) {
2272+
if (ResTy == MVT::v8i1) {
2273+
SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
2274+
SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
2275+
return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
2276+
}
2277+
return SDValue();
22732278
}
22742279

2275-
return SDValue();
2280+
return Op;
22762281
}
22772282

22782283
bool

llvm/lib/Target/Hexagon/HexagonISelLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ namespace HexagonISD {
445445
SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const;
446446
SDValue LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const;
447447
SDValue LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const;
448-
448+
SDValue LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const;
449449
SDValue LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const;
450450
SDValue LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const;
451451
SDValue LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp

Lines changed: 131 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1437,6 +1437,136 @@ HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
14371437
return T7;
14381438
}
14391439

1440+
SDValue HexagonTargetLowering::LowerHvxBitcast(SDValue Op,
1441+
SelectionDAG &DAG) const {
1442+
auto *N = Op.getNode();
1443+
EVT VT = N->getValueType(0);
1444+
const SDLoc &dl(Op);
1445+
SDValue Q0 = N->getOperand(0);
1446+
EVT VTOp = Q0.getNode()->getValueType(0);
1447+
if (!(VT == MVT::i64 || VT == MVT::i32) ||
1448+
!(VTOp == MVT::v64i1 || VTOp == MVT::v32i1)) {
1449+
return Op;
1450+
}
1451+
MVT VecTy;
1452+
int Length;
1453+
if (Subtarget.useHVX64BOps()) {
1454+
VecTy = MVT::getVectorVT(MVT::i32, 16);
1455+
Length = 2;
1456+
}
1457+
if (Subtarget.useHVX128BOps()) {
1458+
VecTy = MVT::getVectorVT(MVT::i32, 32);
1459+
Length = 4;
1460+
}
1461+
// r0 = ##0x08040201 // Pre-rotated bits per 4 consecutive bytes.
1462+
SDValue C8421 = DAG.getTargetConstant(0x08040201, dl, MVT::i32);
1463+
SDValue InstrC8421 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C8421, DAG);
1464+
// v0 = vand(q0,r0)
1465+
SDValue Vand =
1466+
getInstr(Hexagon::V6_vandqrt, dl, VecTy, {Q0, InstrC8421}, DAG);
1467+
1468+
// Or the bytes in each word into a single byte: that will form packs
1469+
// of 4 bits of the output.
1470+
// v1 = valign(v0,v0,#2)
1471+
SDValue C2 = DAG.getTargetConstant(2, dl, MVT::i32);
1472+
SDValue Valign =
1473+
getInstr(Hexagon::V6_valignbi, dl, VecTy, {Vand, Vand, C2}, DAG);
1474+
// v0 = vor(v0,v1)
1475+
SDValue Vor = getInstr(Hexagon::V6_vor, dl, VecTy, {Vand, Valign}, DAG);
1476+
// v1 = valign(v0,v0,#1)
1477+
SDValue C1 = DAG.getTargetConstant(1, dl, MVT::i32);
1478+
SDValue Valign1 =
1479+
getInstr(Hexagon::V6_valignbi, dl, VecTy, {Vor, Vor, C1}, DAG);
1480+
// v0 = vor(v0,v1)
1481+
SDValue Vor1 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vor, Valign1}, DAG);
1482+
1483+
// Clear all the bytes per word except the lowest one.
1484+
// r0 = #0xff
1485+
SDValue Cff = DAG.getTargetConstant(0xff, dl, MVT::i32);
1486+
SDValue InstrCff = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, Cff, DAG);
1487+
// v1 = vsplat(r0)
1488+
SDValue Vsplat = getInstr(Hexagon::V6_lvsplatw, dl, VecTy, InstrCff, DAG);
1489+
// v0 = vand(v0,v1)
1490+
SDValue Vand1 = getInstr(Hexagon::V6_vand, dl, VecTy, {Vor1, Vsplat}, DAG);
1491+
1492+
// Shift each word left by its index to position the 4-bit packs for oring.
1493+
// The words 0..8 and 16..31 need to be ored to form the 64-bit output.
1494+
// r0 = ##.Lshifts
1495+
// .Lshifts:
1496+
// .word 0
1497+
// .word 4
1498+
// .word 8
1499+
// .word 12
1500+
// .word 16
1501+
// .word 20
1502+
// .word 24
1503+
// .word 28
1504+
// .word 0
1505+
// .word 4
1506+
// .word 8
1507+
// .word 12
1508+
// .word 16
1509+
// .word 20
1510+
// .word 24
1511+
// .word 28
1512+
// v1 = vmem(r0+#0)
1513+
SmallVector<SDValue, 32> Elems;
1514+
for (int i = 0; i < Length; ++i) {
1515+
Elems.push_back(DAG.getConstant(0, dl, MVT::i32));
1516+
Elems.push_back(DAG.getConstant(4, dl, MVT::i32));
1517+
Elems.push_back(DAG.getConstant(8, dl, MVT::i32));
1518+
Elems.push_back(DAG.getConstant(12, dl, MVT::i32));
1519+
Elems.push_back(DAG.getConstant(16, dl, MVT::i32));
1520+
Elems.push_back(DAG.getConstant(20, dl, MVT::i32));
1521+
Elems.push_back(DAG.getConstant(24, dl, MVT::i32));
1522+
Elems.push_back(DAG.getConstant(28, dl, MVT::i32));
1523+
}
1524+
1525+
SDValue BV = DAG.getBuildVector(VecTy, dl, Elems);
1526+
// v0.w = vasl(v0.w,v1.w)
1527+
SDValue Vasl = getInstr(Hexagon::V6_vaslwv, dl, VecTy, {Vand1, BV}, DAG);
1528+
1529+
// 3 rounds of oring.
1530+
// r0 = #16 // HwLen/4
1531+
SDValue C16 = DAG.getTargetConstant(16, dl, MVT::i32);
1532+
SDValue InstrC16 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C16, DAG);
1533+
// v1 = vror(v0,r0)
1534+
SDValue Vror = getInstr(Hexagon::V6_vror, dl, VecTy, {Vasl, InstrC16}, DAG);
1535+
// v0 = vor(v0,v1)
1536+
SDValue Vor2 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vasl, Vror}, DAG);
1537+
// r0 = #8 // HwLen/8
1538+
SDValue C8 = DAG.getTargetConstant(8, dl, MVT::i32);
1539+
SDValue InstrC8 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C8, DAG);
1540+
// v1 = vror(v0,r0)
1541+
SDValue Vror1 = getInstr(Hexagon::V6_vror, dl, VecTy, {Vor2, InstrC8}, DAG);
1542+
// v0 = vor(v0,v1)
1543+
SDValue Vor3 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vor2, Vror1}, DAG);
1544+
// r0 = #4 // HwLen/16
1545+
SDValue C4 = DAG.getTargetConstant(4, dl, MVT::i32);
1546+
SDValue InstrC4 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C4, DAG);
1547+
// v1 = vror(v0,r0)
1548+
SDValue Vror2 = getInstr(Hexagon::V6_vror, dl, VecTy, {Vor3, InstrC4}, DAG);
1549+
// v0 = vor(v0,v1)
1550+
SDValue Vor4 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vor3, Vror2}, DAG);
1551+
// The output is v.w[8]:v.w[0]
1552+
// r3 = #0
1553+
SDValue C0 = DAG.getTargetConstant(0, dl, MVT::i32);
1554+
SDValue InstrC0 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C0, DAG);
1555+
// r0 = vextract(v0,r3)
1556+
SDValue Res =
1557+
getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC0}, DAG);
1558+
if (VT == MVT::i64) {
1559+
// r3 = #32
1560+
SDValue C32 = DAG.getTargetConstant(32, dl, MVT::i32);
1561+
SDValue InstrC32 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C32, DAG);
1562+
// r1 = vextract(v0,r3)
1563+
SDValue Vextract =
1564+
getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC32}, DAG);
1565+
Res = getInstr(Hexagon::A2_combinew, dl, MVT::i64, {Vextract, Res}, DAG);
1566+
}
1567+
return Res;
1568+
}
1569+
14401570
SDValue
14411571
HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
14421572
// Sign- and zero-extends are legal.
@@ -1595,7 +1725,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
15951725
case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
15961726
case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
15971727
case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
1598-
1728+
case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
15991729
case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
16001730
case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
16011731
case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; RUN: llc -march=hexagon < %s | FileCheck %s
2+
3+
; CHECK-LABEL: danny:
4+
; CHECK: vand
5+
define i64 @danny(<64 x i8> %a0, <64 x i8> %a1) #0 {
6+
%v0 = icmp eq <64 x i8> %a0, %a1
7+
%v1 = bitcast <64 x i1> %v0 to i64
8+
ret i64 %v1
9+
}
10+
11+
; CHECK-LABEL: sammy:
12+
; CHECK: vand
13+
define i32 @sammy(<32 x i16> %a0, <32 x i16> %a1) #0 {
14+
%v0 = icmp eq <32 x i16> %a0, %a1
15+
%v1 = bitcast <32 x i1> %v0 to i32
16+
ret i32 %v1
17+
}
18+
19+
; This one still doesn't work.
20+
; define i16 @kirby(<16 x i32> %a0, <16 x i32> %a1) #0 {
21+
; %v0 = icmp eq <16 x i32> %a0, %a1
22+
; %v1 = bitcast <16 x i1> %v0 to i16
23+
; ret i16 %v1
24+
; }
25+
26+
attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+v66,+hvx,+hvxv66,+hvx-length64b" }
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; RUN: llc -march=hexagon < %s | FileCheck %s
2+
3+
; Test that LLVM does not assert and bitcast v64i1 to i64 is lowered.
4+
5+
; CHECK: v[[REG1:[0-9]+]] = valign(v{{[0-9]+}},v{{[0-9]+}},#2)
6+
; CHECK: v[[REG2:[0-9]+]] = vor(v{{[0-9]+}},v[[REG1]])
7+
; CHECK: v[[REG3:[0-9]+]] = valign(v[[REG2]],v[[REG2]],#1)
8+
; CHECK: v[[REG4:[0-9]+]] = vor(v{{[0-9]+}},v[[REG3]])
9+
; CHECK: v[[REG5:[0-9]+]] = vand(v[[REG4]],v{{[0-9]+}})
10+
; CHECK: v{{[0-9]+}}.w = vasl(v[[REG5]].w,v{{[0-9]+}}.w)
11+
12+
target triple = "hexagon"
13+
14+
define dso_local void @fun() local_unnamed_addr #0 {
15+
entry:
16+
br i1 undef, label %cleanup, label %if.end
17+
18+
if.end:
19+
%0 = load i8, i8* undef, align 1
20+
%conv13.i = zext i8 %0 to i32
21+
%trip.count.minus.1216 = add nsw i32 %conv13.i, -1
22+
%broadcast.splatinsert221 = insertelement <64 x i32> undef, i32 %trip.count.minus.1216, i32 0
23+
%broadcast.splat222 = shufflevector <64 x i32> %broadcast.splatinsert221, <64 x i32> undef, <64 x i32> zeroinitializer
24+
%1 = icmp ule <64 x i32> undef, %broadcast.splat222
25+
%wide.masked.load223 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* nonnull undef, i32 1, <64 x i1> %1, <64 x i8> undef)
26+
%2 = lshr <64 x i8> %wide.masked.load223, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
27+
%3 = and <64 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
28+
%4 = zext <64 x i8> %3 to <64 x i32>
29+
%5 = add nsw <64 x i32> undef, %4
30+
%6 = select <64 x i1> %1, <64 x i32> %5, <64 x i32> undef
31+
%bin.rdx225 = add <64 x i32> %6, undef
32+
%bin.rdx227 = add <64 x i32> %bin.rdx225, undef
33+
%bin.rdx229 = add <64 x i32> %bin.rdx227, undef
34+
%bin.rdx231 = add <64 x i32> %bin.rdx229, undef
35+
%bin.rdx233 = add <64 x i32> %bin.rdx231, undef
36+
%bin.rdx235 = add <64 x i32> %bin.rdx233, undef
37+
%bin.rdx237 = add <64 x i32> %bin.rdx235, undef
38+
%7 = extractelement <64 x i32> %bin.rdx237, i32 0
39+
%nChans = getelementptr inbounds i8, i8* null, i32 2160
40+
%8 = bitcast i8* %nChans to i32*
41+
store i32 %7, i32* %8, align 4
42+
br label %cleanup
43+
44+
cleanup:
45+
ret void
46+
}
47+
48+
; Function Attrs: argmemonly nounwind readonly willreturn
49+
declare <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>*, i32, <64 x i1>, <64 x i8>)
50+
51+
attributes #0 = { "target-features"="+hvx-length64b,+hvxv67,+v67,-long-calls" }

0 commit comments

Comments
 (0)