@@ -1437,6 +1437,136 @@ HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1437
1437
return T7;
1438
1438
}
1439
1439
1440
+ SDValue HexagonTargetLowering::LowerHvxBitcast (SDValue Op,
1441
+ SelectionDAG &DAG) const {
1442
+ auto *N = Op.getNode ();
1443
+ EVT VT = N->getValueType (0 );
1444
+ const SDLoc &dl (Op);
1445
+ SDValue Q0 = N->getOperand (0 );
1446
+ EVT VTOp = Q0.getNode ()->getValueType (0 );
1447
+ if (!(VT == MVT::i64 || VT == MVT::i32 ) ||
1448
+ !(VTOp == MVT::v64i1 || VTOp == MVT::v32i1)) {
1449
+ return Op;
1450
+ }
1451
+ MVT VecTy;
1452
+ int Length;
1453
+ if (Subtarget.useHVX64BOps ()) {
1454
+ VecTy = MVT::getVectorVT (MVT::i32 , 16 );
1455
+ Length = 2 ;
1456
+ }
1457
+ if (Subtarget.useHVX128BOps ()) {
1458
+ VecTy = MVT::getVectorVT (MVT::i32 , 32 );
1459
+ Length = 4 ;
1460
+ }
1461
+ // r0 = ##0x08040201 // Pre-rotated bits per 4 consecutive bytes.
1462
+ SDValue C8421 = DAG.getTargetConstant (0x08040201 , dl, MVT::i32 );
1463
+ SDValue InstrC8421 = getInstr (Hexagon::A2_tfrsi, dl, MVT::i32 , C8421, DAG);
1464
+ // v0 = vand(q0,r0)
1465
+ SDValue Vand =
1466
+ getInstr (Hexagon::V6_vandqrt, dl, VecTy, {Q0, InstrC8421}, DAG);
1467
+
1468
+ // Or the bytes in each word into a single byte: that will form packs
1469
+ // of 4 bits of the output.
1470
+ // v1 = valign(v0,v0,#2)
1471
+ SDValue C2 = DAG.getTargetConstant (2 , dl, MVT::i32 );
1472
+ SDValue Valign =
1473
+ getInstr (Hexagon::V6_valignbi, dl, VecTy, {Vand, Vand, C2}, DAG);
1474
+ // v0 = vor(v0,v1)
1475
+ SDValue Vor = getInstr (Hexagon::V6_vor, dl, VecTy, {Vand, Valign}, DAG);
1476
+ // v1 = valign(v0,v0,#1)
1477
+ SDValue C1 = DAG.getTargetConstant (1 , dl, MVT::i32 );
1478
+ SDValue Valign1 =
1479
+ getInstr (Hexagon::V6_valignbi, dl, VecTy, {Vor, Vor, C1}, DAG);
1480
+ // v0 = vor(v0,v1)
1481
+ SDValue Vor1 = getInstr (Hexagon::V6_vor, dl, VecTy, {Vor, Valign1}, DAG);
1482
+
1483
+ // Clear all the bytes per word except the lowest one.
1484
+ // r0 = #0xff
1485
+ SDValue Cff = DAG.getTargetConstant (0xff , dl, MVT::i32 );
1486
+ SDValue InstrCff = getInstr (Hexagon::A2_tfrsi, dl, MVT::i32 , Cff, DAG);
1487
+ // v1 = vsplat(r0)
1488
+ SDValue Vsplat = getInstr (Hexagon::V6_lvsplatw, dl, VecTy, InstrCff, DAG);
1489
+ // v0 = vand(v0,v1)
1490
+ SDValue Vand1 = getInstr (Hexagon::V6_vand, dl, VecTy, {Vor1, Vsplat}, DAG);
1491
+
1492
+ // Shift each word left by its index to position the 4-bit packs for oring.
1493
+ // The words 0..8 and 16..31 need to be ored to form the 64-bit output.
1494
+ // r0 = ##.Lshifts
1495
+ // .Lshifts:
1496
+ // .word 0
1497
+ // .word 4
1498
+ // .word 8
1499
+ // .word 12
1500
+ // .word 16
1501
+ // .word 20
1502
+ // .word 24
1503
+ // .word 28
1504
+ // .word 0
1505
+ // .word 4
1506
+ // .word 8
1507
+ // .word 12
1508
+ // .word 16
1509
+ // .word 20
1510
+ // .word 24
1511
+ // .word 28
1512
+ // v1 = vmem(r0+#0)
1513
+ SmallVector<SDValue, 32 > Elems;
1514
+ for (int i = 0 ; i < Length; ++i) {
1515
+ Elems.push_back (DAG.getConstant (0 , dl, MVT::i32 ));
1516
+ Elems.push_back (DAG.getConstant (4 , dl, MVT::i32 ));
1517
+ Elems.push_back (DAG.getConstant (8 , dl, MVT::i32 ));
1518
+ Elems.push_back (DAG.getConstant (12 , dl, MVT::i32 ));
1519
+ Elems.push_back (DAG.getConstant (16 , dl, MVT::i32 ));
1520
+ Elems.push_back (DAG.getConstant (20 , dl, MVT::i32 ));
1521
+ Elems.push_back (DAG.getConstant (24 , dl, MVT::i32 ));
1522
+ Elems.push_back (DAG.getConstant (28 , dl, MVT::i32 ));
1523
+ }
1524
+
1525
+ SDValue BV = DAG.getBuildVector (VecTy, dl, Elems);
1526
+ // v0.w = vasl(v0.w,v1.w)
1527
+ SDValue Vasl = getInstr (Hexagon::V6_vaslwv, dl, VecTy, {Vand1, BV}, DAG);
1528
+
1529
+ // 3 rounds of oring.
1530
+ // r0 = #16 // HwLen/4
1531
+ SDValue C16 = DAG.getTargetConstant (16 , dl, MVT::i32 );
1532
+ SDValue InstrC16 = getInstr (Hexagon::A2_tfrsi, dl, MVT::i32 , C16, DAG);
1533
+ // v1 = vror(v0,r0)
1534
+ SDValue Vror = getInstr (Hexagon::V6_vror, dl, VecTy, {Vasl, InstrC16}, DAG);
1535
+ // v0 = vor(v0,v1)
1536
+ SDValue Vor2 = getInstr (Hexagon::V6_vor, dl, VecTy, {Vasl, Vror}, DAG);
1537
+ // r0 = #8 // HwLen/8
1538
+ SDValue C8 = DAG.getTargetConstant (8 , dl, MVT::i32 );
1539
+ SDValue InstrC8 = getInstr (Hexagon::A2_tfrsi, dl, MVT::i32 , C8, DAG);
1540
+ // v1 = vror(v0,r0)
1541
+ SDValue Vror1 = getInstr (Hexagon::V6_vror, dl, VecTy, {Vor2, InstrC8}, DAG);
1542
+ // v0 = vor(v0,v1)
1543
+ SDValue Vor3 = getInstr (Hexagon::V6_vor, dl, VecTy, {Vor2, Vror1}, DAG);
1544
+ // r0 = #4 // HwLen/16
1545
+ SDValue C4 = DAG.getTargetConstant (4 , dl, MVT::i32 );
1546
+ SDValue InstrC4 = getInstr (Hexagon::A2_tfrsi, dl, MVT::i32 , C4, DAG);
1547
+ // v1 = vror(v0,r0)
1548
+ SDValue Vror2 = getInstr (Hexagon::V6_vror, dl, VecTy, {Vor3, InstrC4}, DAG);
1549
+ // v0 = vor(v0,v1)
1550
+ SDValue Vor4 = getInstr (Hexagon::V6_vor, dl, VecTy, {Vor3, Vror2}, DAG);
1551
+ // The output is v.w[8]:v.w[0]
1552
+ // r3 = #0
1553
+ SDValue C0 = DAG.getTargetConstant (0 , dl, MVT::i32 );
1554
+ SDValue InstrC0 = getInstr (Hexagon::A2_tfrsi, dl, MVT::i32 , C0, DAG);
1555
+ // r0 = vextract(v0,r3)
1556
+ SDValue Res =
1557
+ getInstr (Hexagon::V6_extractw, dl, MVT::i32 , {Vor4, InstrC0}, DAG);
1558
+ if (VT == MVT::i64 ) {
1559
+ // r3 = #32
1560
+ SDValue C32 = DAG.getTargetConstant (32 , dl, MVT::i32 );
1561
+ SDValue InstrC32 = getInstr (Hexagon::A2_tfrsi, dl, MVT::i32 , C32, DAG);
1562
+ // r1 = vextract(v0,r3)
1563
+ SDValue Vextract =
1564
+ getInstr (Hexagon::V6_extractw, dl, MVT::i32 , {Vor4, InstrC32}, DAG);
1565
+ Res = getInstr (Hexagon::A2_combinew, dl, MVT::i64 , {Vextract, Res}, DAG);
1566
+ }
1567
+ return Res;
1568
+ }
1569
+
1440
1570
SDValue
1441
1571
HexagonTargetLowering::LowerHvxExtend (SDValue Op, SelectionDAG &DAG) const {
1442
1572
// Sign- and zero-extends are legal.
@@ -1595,7 +1725,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
1595
1725
case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement (Op, DAG);
1596
1726
case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector (Op, DAG);
1597
1727
case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement (Op, DAG);
1598
-
1728
+ case ISD::BITCAST: return LowerHvxBitcast (Op, DAG);
1599
1729
case ISD::ANY_EXTEND: return LowerHvxAnyExt (Op, DAG);
1600
1730
case ISD::SIGN_EXTEND: return LowerHvxSignExt (Op, DAG);
1601
1731
case ISD::ZERO_EXTEND: return LowerHvxZeroExt (Op, DAG);
0 commit comments