@@ -1744,45 +1744,60 @@ void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1744
1744
1745
1745
void AArch64DAGToDAGISel::SelectSMELdrStrZA (SDNode *N, bool IsLoad) {
1746
1746
// Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA.
1747
- // If the vector select parameter is an immediate in the range 0-15 then we
1748
- // can emit it directly into the instruction as it's a legal operand.
1749
- // Otherwise we must emit 0 as the vector select operand and modify the base
1750
- // register instead.
1747
+ // If the vector number is an immediate between 0 and 15 inclusive then we can
1748
+ // put that directly into the immediate field of the instruction. If it's
1749
+ // outside of that range then we modify the base and slice by the greatest
1750
+ // multiple of 15 smaller than that number and put the remainder in the
1751
+ // instruction field. If it's not an immediate then we modify the base and
1752
+ // slice registers by that number and put 0 in the instruction.
1751
1753
SDLoc DL (N);
1752
1754
1753
- SDValue VecNum = N->getOperand (4 ), Base = N->getOperand (3 ),
1754
- TileSlice = N->getOperand (2 );
1755
- int Imm = -1 ;
1756
- if (auto ImmNode = dyn_cast<ConstantSDNode>(VecNum))
1757
- Imm = ImmNode->getZExtValue ();
1755
+ SDValue TileSlice = N->getOperand (2 );
1756
+ SDValue Base = N->getOperand (3 );
1757
+ SDValue VecNum = N->getOperand (4 );
1758
+ SDValue Remainder = CurDAG->getTargetConstant (0 , DL, MVT::i32 );
1759
+
1760
+ // true if the base and slice registers need to me modified
1761
+ bool NeedsAdd = true ;
1762
+ if (auto ImmNode = dyn_cast<ConstantSDNode>(VecNum)) {
1763
+ int Imm = ImmNode->getSExtValue ();
1764
+ if (Imm >= 0 && Imm <= 15 ) {
1765
+ Remainder = CurDAG->getTargetConstant (Imm, DL, MVT::i32 );
1766
+ NeedsAdd = false ;
1767
+ } else {
1768
+ Remainder = CurDAG->getTargetConstant (Imm % 15 , DL, MVT::i32 );
1769
+ NeedsAdd = true ;
1770
+ VecNum =
1771
+ SDValue (CurDAG->getMachineNode (AArch64::MOVi32imm, DL, MVT::i32 ,
1772
+ CurDAG->getTargetConstant (
1773
+ Imm - (Imm % 15 ), DL, MVT::i32 )),
1774
+ 0 );
1775
+ }
1776
+ }
1758
1777
1759
- if (Imm >= 0 && Imm <= 15 ) {
1760
- // 0-15 is a legal immediate so just pass it directly as a TargetConstant
1761
- VecNum = CurDAG->getTargetConstant (Imm, DL, MVT::i32 );
1762
- } else {
1778
+ if (NeedsAdd) {
1763
1779
// Get the vector length that will be multiplied by vnum
1764
1780
auto SVL = SDValue (
1765
1781
CurDAG->getMachineNode (AArch64::RDSVLI_XI, DL, MVT::i64 ,
1766
1782
CurDAG->getTargetConstant (1 , DL, MVT::i32 )),
1767
1783
0 );
1768
1784
1769
- // Multiply SVL and vnum then add it to the base register
1770
- if (VecNum.getValueType () == MVT::i32 )
1771
- VecNum = Widen (CurDAG, VecNum);
1772
- SDValue AddOps[] = {SVL, VecNum, Base};
1773
- auto Add = SDValue (
1774
- CurDAG->getMachineNode (AArch64::MADDXrrr, DL, MVT::i64 , AddOps), 0 );
1775
-
1776
- // The base register has been modified to take vnum into account so just
1777
- // pass 0
1778
- VecNum = CurDAG->getTargetConstant (0 , DL, MVT::i32 );
1779
- Base = Add;
1785
+ // Multiply SVL and vnum then add it to the base
1786
+ // Just add vnum to the tileslice
1787
+ SDValue BaseAddOps[] = {
1788
+ SVL, VecNum.getValueType () == MVT::i32 ? Widen (CurDAG, VecNum) : VecNum,
1789
+ Base};
1790
+ SDValue SliceAddOps[] = {TileSlice, VecNum};
1791
+ Base = SDValue (
1792
+ CurDAG->getMachineNode (AArch64::MADDXrrr, DL, MVT::i64 , BaseAddOps), 0 );
1793
+ TileSlice = SDValue (
1794
+ CurDAG->getMachineNode (AArch64::ADDWrr, DL, MVT::i32 , SliceAddOps), 0 );
1780
1795
}
1781
1796
1782
- SmallVector<SDValue, 6 > Ops = {TileSlice, VecNum , Base};
1797
+ SmallVector<SDValue, 6 > Ops = {TileSlice, Remainder , Base};
1783
1798
if (!IsLoad) {
1784
1799
Ops.insert (Ops.begin (), CurDAG->getRegister (AArch64::ZA, MVT::Other));
1785
- Ops.push_back (VecNum );
1800
+ Ops.push_back (Remainder );
1786
1801
}
1787
1802
auto LdrStr =
1788
1803
CurDAG->getMachineNode (IsLoad ? AArch64::LDR_ZA_PSEUDO : AArch64::STR_ZA,
0 commit comments