Skip to content

Commit e8efe7f

Browse files
committed
[AArch64][SME2][SVE2p1] Choose strided or contiguous loads
Lower to the strided/contiguous addressing mode of ld1/ldnt1 instructions depending on register allocation. Differential Revision: https://reviews.llvm.org/D156311
1 parent e18a547 commit e8efe7f

File tree

7 files changed

+4786
-53
lines changed

7 files changed

+4786
-53
lines changed

llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@ class AArch64ExpandPseudo : public MachineFunctionPass {
6666
bool expandMBB(MachineBasicBlock &MBB);
6767
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
6868
MachineBasicBlock::iterator &NextMBBI);
69+
bool expandMultiVecPseudo(MachineBasicBlock &MBB,
70+
MachineBasicBlock::iterator MBBI,
71+
TargetRegisterClass ContiguousClass,
72+
TargetRegisterClass StridedClass,
73+
unsigned ContiguousOpc, unsigned StridedOpc);
6974
bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
7075
unsigned BitSize);
7176

@@ -1038,6 +1043,35 @@ AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
10381043
return EndBB;
10391044
}
10401045

1046+
bool AArch64ExpandPseudo::expandMultiVecPseudo(
1047+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1048+
TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
1049+
unsigned ContiguousOp, unsigned StridedOpc) {
1050+
MachineInstr &MI = *MBBI;
1051+
Register Tuple = MI.getOperand(0).getReg();
1052+
1053+
auto ContiguousRange = ContiguousClass.getRegisters();
1054+
auto StridedRange = StridedClass.getRegisters();
1055+
unsigned Opc;
1056+
if ((std::find(ContiguousRange.begin(), ContiguousRange.end(),
1057+
Tuple.asMCReg()) != std::end(ContiguousRange))) {
1058+
Opc = ContiguousOp;
1059+
} else if ((std::find(StridedRange.begin(), StridedRange.end(),
1060+
Tuple.asMCReg()) != std::end(StridedRange))) {
1061+
Opc = StridedOpc;
1062+
} else
1063+
llvm_unreachable("Cannot expand Multi-Vector pseudo");
1064+
1065+
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
1066+
.add(MI.getOperand(0))
1067+
.add(MI.getOperand(1))
1068+
.add(MI.getOperand(2))
1069+
.add(MI.getOperand(3));
1070+
transferImpOps(MI, MIB, MIB);
1071+
MI.eraseFromParent();
1072+
return true;
1073+
}
1074+
10411075
/// If MBBI references a pseudo instruction that should be expanded here,
10421076
/// do the expansion and return true. Otherwise return false.
10431077
bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
@@ -1492,6 +1526,134 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
14921526
MI.eraseFromParent();
14931527
return true;
14941528
}
1529+
case AArch64::LD1B_2Z_IMM_PSEUDO:
1530+
return expandMultiVecPseudo(
1531+
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1532+
AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM);
1533+
case AArch64::LD1H_2Z_IMM_PSEUDO:
1534+
return expandMultiVecPseudo(
1535+
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1536+
AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM);
1537+
case AArch64::LD1W_2Z_IMM_PSEUDO:
1538+
return expandMultiVecPseudo(
1539+
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1540+
AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM);
1541+
case AArch64::LD1D_2Z_IMM_PSEUDO:
1542+
return expandMultiVecPseudo(
1543+
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1544+
AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM);
1545+
case AArch64::LDNT1B_2Z_IMM_PSEUDO:
1546+
return expandMultiVecPseudo(
1547+
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1548+
AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM);
1549+
case AArch64::LDNT1H_2Z_IMM_PSEUDO:
1550+
return expandMultiVecPseudo(
1551+
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1552+
AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM);
1553+
case AArch64::LDNT1W_2Z_IMM_PSEUDO:
1554+
return expandMultiVecPseudo(
1555+
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1556+
AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM);
1557+
case AArch64::LDNT1D_2Z_IMM_PSEUDO:
1558+
return expandMultiVecPseudo(
1559+
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1560+
AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM);
1561+
case AArch64::LD1B_2Z_PSEUDO:
1562+
return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1563+
AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z,
1564+
AArch64::LD1B_2Z_STRIDED);
1565+
case AArch64::LD1H_2Z_PSEUDO:
1566+
return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1567+
AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z,
1568+
AArch64::LD1H_2Z_STRIDED);
1569+
case AArch64::LD1W_2Z_PSEUDO:
1570+
return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1571+
AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z,
1572+
AArch64::LD1W_2Z_STRIDED);
1573+
case AArch64::LD1D_2Z_PSEUDO:
1574+
return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1575+
AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z,
1576+
AArch64::LD1D_2Z_STRIDED);
1577+
case AArch64::LDNT1B_2Z_PSEUDO:
1578+
return expandMultiVecPseudo(
1579+
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1580+
AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED);
1581+
case AArch64::LDNT1H_2Z_PSEUDO:
1582+
return expandMultiVecPseudo(
1583+
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1584+
AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED);
1585+
case AArch64::LDNT1W_2Z_PSEUDO:
1586+
return expandMultiVecPseudo(
1587+
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1588+
AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED);
1589+
case AArch64::LDNT1D_2Z_PSEUDO:
1590+
return expandMultiVecPseudo(
1591+
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1592+
AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED);
1593+
case AArch64::LD1B_4Z_IMM_PSEUDO:
1594+
return expandMultiVecPseudo(
1595+
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1596+
AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM);
1597+
case AArch64::LD1H_4Z_IMM_PSEUDO:
1598+
return expandMultiVecPseudo(
1599+
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1600+
AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM);
1601+
case AArch64::LD1W_4Z_IMM_PSEUDO:
1602+
return expandMultiVecPseudo(
1603+
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1604+
AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM);
1605+
case AArch64::LD1D_4Z_IMM_PSEUDO:
1606+
return expandMultiVecPseudo(
1607+
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1608+
AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM);
1609+
case AArch64::LDNT1B_4Z_IMM_PSEUDO:
1610+
return expandMultiVecPseudo(
1611+
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1612+
AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM);
1613+
case AArch64::LDNT1H_4Z_IMM_PSEUDO:
1614+
return expandMultiVecPseudo(
1615+
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1616+
AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM);
1617+
case AArch64::LDNT1W_4Z_IMM_PSEUDO:
1618+
return expandMultiVecPseudo(
1619+
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1620+
AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM);
1621+
case AArch64::LDNT1D_4Z_IMM_PSEUDO:
1622+
return expandMultiVecPseudo(
1623+
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1624+
AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM);
1625+
case AArch64::LD1B_4Z_PSEUDO:
1626+
return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1627+
AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z,
1628+
AArch64::LD1B_4Z_STRIDED);
1629+
case AArch64::LD1H_4Z_PSEUDO:
1630+
return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1631+
AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z,
1632+
AArch64::LD1H_4Z_STRIDED);
1633+
case AArch64::LD1W_4Z_PSEUDO:
1634+
return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1635+
AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z,
1636+
AArch64::LD1W_4Z_STRIDED);
1637+
case AArch64::LD1D_4Z_PSEUDO:
1638+
return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1639+
AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z,
1640+
AArch64::LD1D_4Z_STRIDED);
1641+
case AArch64::LDNT1B_4Z_PSEUDO:
1642+
return expandMultiVecPseudo(
1643+
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1644+
AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED);
1645+
case AArch64::LDNT1H_4Z_PSEUDO:
1646+
return expandMultiVecPseudo(
1647+
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1648+
AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED);
1649+
case AArch64::LDNT1W_4Z_PSEUDO:
1650+
return expandMultiVecPseudo(
1651+
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1652+
AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED);
1653+
case AArch64::LDNT1D_4Z_PSEUDO:
1654+
return expandMultiVecPseudo(
1655+
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1656+
AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED);
14951657
}
14961658
return false;
14971659
}

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 136 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4660,68 +4660,188 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
46604660
}
46614661
case Intrinsic::aarch64_sve_ld1_pn_x2: {
46624662
if (VT == MVT::nxv16i8) {
4663-
SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z);
4663+
if (Subtarget->hasSME2())
4664+
SelectContiguousMultiVectorLoad(
4665+
Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
4666+
else if (Subtarget->hasSVE2p1())
4667+
SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
4668+
AArch64::LD1B_2Z);
4669+
else
4670+
break;
46644671
return;
46654672
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
46664673
VT == MVT::nxv8bf16) {
4667-
SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z);
4674+
if (Subtarget->hasSME2())
4675+
SelectContiguousMultiVectorLoad(
4676+
Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
4677+
else if (Subtarget->hasSVE2p1())
4678+
SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
4679+
AArch64::LD1H_2Z);
4680+
else
4681+
break;
46684682
return;
46694683
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4670-
SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z);
4684+
if (Subtarget->hasSME2())
4685+
SelectContiguousMultiVectorLoad(
4686+
Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
4687+
else if (Subtarget->hasSVE2p1())
4688+
SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
4689+
AArch64::LD1W_2Z);
4690+
else
4691+
break;
46714692
return;
46724693
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4673-
SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z);
4694+
if (Subtarget->hasSME2())
4695+
SelectContiguousMultiVectorLoad(
4696+
Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
4697+
else if (Subtarget->hasSVE2p1())
4698+
SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
4699+
AArch64::LD1D_2Z);
4700+
else
4701+
break;
46744702
return;
46754703
}
46764704
break;
46774705
}
46784706
case Intrinsic::aarch64_sve_ld1_pn_x4: {
46794707
if (VT == MVT::nxv16i8) {
4680-
SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z);
4708+
if (Subtarget->hasSME2())
4709+
SelectContiguousMultiVectorLoad(
4710+
Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
4711+
else if (Subtarget->hasSVE2p1())
4712+
SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
4713+
AArch64::LD1B_4Z);
4714+
else
4715+
break;
46814716
return;
46824717
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
46834718
VT == MVT::nxv8bf16) {
4684-
SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z);
4719+
if (Subtarget->hasSME2())
4720+
SelectContiguousMultiVectorLoad(
4721+
Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
4722+
else if (Subtarget->hasSVE2p1())
4723+
SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
4724+
AArch64::LD1H_4Z);
4725+
else
4726+
break;
46854727
return;
46864728
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4687-
SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z);
4729+
if (Subtarget->hasSME2())
4730+
SelectContiguousMultiVectorLoad(
4731+
Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
4732+
else if (Subtarget->hasSVE2p1())
4733+
SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
4734+
AArch64::LD1W_4Z);
4735+
else
4736+
break;
46884737
return;
46894738
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4690-
SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z);
4739+
if (Subtarget->hasSME2())
4740+
SelectContiguousMultiVectorLoad(
4741+
Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
4742+
else if (Subtarget->hasSVE2p1())
4743+
SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
4744+
AArch64::LD1D_4Z);
4745+
else
4746+
break;
46914747
return;
46924748
}
46934749
break;
46944750
}
46954751
case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
46964752
if (VT == MVT::nxv16i8) {
4697-
SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z);
4753+
if (Subtarget->hasSME2())
4754+
SelectContiguousMultiVectorLoad(Node, 2, 0,
4755+
AArch64::LDNT1B_2Z_IMM_PSEUDO,
4756+
AArch64::LDNT1B_2Z_PSEUDO);
4757+
else if (Subtarget->hasSVE2p1())
4758+
SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
4759+
AArch64::LDNT1B_2Z);
4760+
else
4761+
break;
46984762
return;
46994763
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
47004764
VT == MVT::nxv8bf16) {
4701-
SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z);
4765+
if (Subtarget->hasSME2())
4766+
SelectContiguousMultiVectorLoad(Node, 2, 1,
4767+
AArch64::LDNT1H_2Z_IMM_PSEUDO,
4768+
AArch64::LDNT1H_2Z_PSEUDO);
4769+
else if (Subtarget->hasSVE2p1())
4770+
SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
4771+
AArch64::LDNT1H_2Z);
4772+
else
4773+
break;
47024774
return;
47034775
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4704-
SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z);
4776+
if (Subtarget->hasSME2())
4777+
SelectContiguousMultiVectorLoad(Node, 2, 2,
4778+
AArch64::LDNT1W_2Z_IMM_PSEUDO,
4779+
AArch64::LDNT1W_2Z_PSEUDO);
4780+
else if (Subtarget->hasSVE2p1())
4781+
SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
4782+
AArch64::LDNT1W_2Z);
4783+
else
4784+
break;
47054785
return;
47064786
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4707-
SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z);
4787+
if (Subtarget->hasSME2())
4788+
SelectContiguousMultiVectorLoad(Node, 2, 3,
4789+
AArch64::LDNT1D_2Z_IMM_PSEUDO,
4790+
AArch64::LDNT1D_2Z_PSEUDO);
4791+
else if (Subtarget->hasSVE2p1())
4792+
SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
4793+
AArch64::LDNT1D_2Z);
4794+
else
4795+
break;
47084796
return;
47094797
}
47104798
break;
47114799
}
47124800
case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
47134801
if (VT == MVT::nxv16i8) {
4714-
SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z);
4802+
if (Subtarget->hasSME2())
4803+
SelectContiguousMultiVectorLoad(Node, 4, 0,
4804+
AArch64::LDNT1B_4Z_IMM_PSEUDO,
4805+
AArch64::LDNT1B_4Z_PSEUDO);
4806+
else if (Subtarget->hasSVE2p1())
4807+
SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
4808+
AArch64::LDNT1B_4Z);
4809+
else
4810+
break;
47154811
return;
47164812
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
47174813
VT == MVT::nxv8bf16) {
4718-
SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z);
4814+
if (Subtarget->hasSME2())
4815+
SelectContiguousMultiVectorLoad(Node, 4, 1,
4816+
AArch64::LDNT1H_4Z_IMM_PSEUDO,
4817+
AArch64::LDNT1H_4Z_PSEUDO);
4818+
else if (Subtarget->hasSVE2p1())
4819+
SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
4820+
AArch64::LDNT1H_4Z);
4821+
else
4822+
break;
47194823
return;
47204824
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4721-
SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z);
4825+
if (Subtarget->hasSME2())
4826+
SelectContiguousMultiVectorLoad(Node, 4, 2,
4827+
AArch64::LDNT1W_4Z_IMM_PSEUDO,
4828+
AArch64::LDNT1W_4Z_PSEUDO);
4829+
else if (Subtarget->hasSVE2p1())
4830+
SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
4831+
AArch64::LDNT1W_4Z);
4832+
else
4833+
break;
47224834
return;
47234835
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4724-
SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z);
4836+
if (Subtarget->hasSME2())
4837+
SelectContiguousMultiVectorLoad(Node, 4, 3,
4838+
AArch64::LDNT1D_4Z_IMM_PSEUDO,
4839+
AArch64::LDNT1D_4Z_PSEUDO);
4840+
else if (Subtarget->hasSVE2p1())
4841+
SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
4842+
AArch64::LDNT1D_4Z);
4843+
else
4844+
break;
47254845
return;
47264846
}
47274847
break;

0 commit comments

Comments
 (0)