Skip to content

Commit 44843e2

Browse files
committed
[ARM][NEON] Combine base address updates for vld1x intrinsics
Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D102855
1 parent de7729d commit 44843e2

File tree

7 files changed

+511
-116
lines changed

7 files changed

+511
-116
lines changed

llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,31 +190,58 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
190190
{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true},
191191

192192
{ ARM::VLD1d16QPseudo, ARM::VLD1d16Q, true, false, false, SingleSpc, 4, 4 ,false},
193+
{ ARM::VLD1d16QPseudoWB_fixed, ARM::VLD1d16Qwb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
194+
{ ARM::VLD1d16QPseudoWB_register, ARM::VLD1d16Qwb_register, true, true, true, SingleSpc, 4, 4 ,false},
193195
{ ARM::VLD1d16TPseudo, ARM::VLD1d16T, true, false, false, SingleSpc, 3, 4 ,false},
196+
{ ARM::VLD1d16TPseudoWB_fixed, ARM::VLD1d16Twb_fixed, true, true, false, SingleSpc, 3, 4 ,false},
197+
{ ARM::VLD1d16TPseudoWB_register, ARM::VLD1d16Twb_register, true, true, true, SingleSpc, 3, 4 ,false},
198+
194199
{ ARM::VLD1d32QPseudo, ARM::VLD1d32Q, true, false, false, SingleSpc, 4, 2 ,false},
200+
{ ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d32Qwb_fixed, true, true, false, SingleSpc, 4, 2 ,false},
201+
{ ARM::VLD1d32QPseudoWB_register, ARM::VLD1d32Qwb_register, true, true, true, SingleSpc, 4, 2 ,false},
195202
{ ARM::VLD1d32TPseudo, ARM::VLD1d32T, true, false, false, SingleSpc, 3, 2 ,false},
203+
{ ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d32Twb_fixed, true, true, false, SingleSpc, 3, 2 ,false},
204+
{ ARM::VLD1d32TPseudoWB_register, ARM::VLD1d32Twb_register, true, true, true, SingleSpc, 3, 2 ,false},
205+
196206
{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
197207
{ ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false},
198208
{ ARM::VLD1d64QPseudoWB_register, ARM::VLD1d64Qwb_register, true, true, true, SingleSpc, 4, 1 ,false},
199209
{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
200210
{ ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false},
201211
{ ARM::VLD1d64TPseudoWB_register, ARM::VLD1d64Twb_register, true, true, true, SingleSpc, 3, 1 ,false},
212+
202213
{ ARM::VLD1d8QPseudo, ARM::VLD1d8Q, true, false, false, SingleSpc, 4, 8 ,false},
214+
{ ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d8Qwb_fixed, true, true, false, SingleSpc, 4, 8 ,false},
215+
{ ARM::VLD1d8QPseudoWB_register, ARM::VLD1d8Qwb_register, true, true, true, SingleSpc, 4, 8 ,false},
203216
{ ARM::VLD1d8TPseudo, ARM::VLD1d8T, true, false, false, SingleSpc, 3, 8 ,false},
217+
{ ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d8Twb_fixed, true, true, false, SingleSpc, 3, 8 ,false},
218+
{ ARM::VLD1d8TPseudoWB_register, ARM::VLD1d8Twb_register, true, true, true, SingleSpc, 3, 8 ,false},
219+
204220
{ ARM::VLD1q16HighQPseudo, ARM::VLD1d16Q, true, false, false, SingleHighQSpc, 4, 4 ,false},
221+
{ ARM::VLD1q16HighQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleHighQSpc, 4, 4 ,false},
205222
{ ARM::VLD1q16HighTPseudo, ARM::VLD1d16T, true, false, false, SingleHighTSpc, 3, 4 ,false},
223+
{ ARM::VLD1q16HighTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleHighTSpc, 3, 4 ,false},
206224
{ ARM::VLD1q16LowQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleLowSpc, 4, 4 ,false},
207225
{ ARM::VLD1q16LowTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleLowSpc, 3, 4 ,false},
226+
208227
{ ARM::VLD1q32HighQPseudo, ARM::VLD1d32Q, true, false, false, SingleHighQSpc, 4, 2 ,false},
228+
{ ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleHighQSpc, 4, 2 ,false},
209229
{ ARM::VLD1q32HighTPseudo, ARM::VLD1d32T, true, false, false, SingleHighTSpc, 3, 2 ,false},
230+
{ ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleHighTSpc, 3, 2 ,false},
210231
{ ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleLowSpc, 4, 2 ,false},
211232
{ ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleLowSpc, 3, 2 ,false},
233+
212234
{ ARM::VLD1q64HighQPseudo, ARM::VLD1d64Q, true, false, false, SingleHighQSpc, 4, 1 ,false},
235+
{ ARM::VLD1q64HighQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleHighQSpc, 4, 1 ,false},
213236
{ ARM::VLD1q64HighTPseudo, ARM::VLD1d64T, true, false, false, SingleHighTSpc, 3, 1 ,false},
237+
{ ARM::VLD1q64HighTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleHighTSpc, 3, 1 ,false},
214238
{ ARM::VLD1q64LowQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleLowSpc, 4, 1 ,false},
215239
{ ARM::VLD1q64LowTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleLowSpc, 3, 1 ,false},
240+
216241
{ ARM::VLD1q8HighQPseudo, ARM::VLD1d8Q, true, false, false, SingleHighQSpc, 4, 8 ,false},
242+
{ ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleHighQSpc, 4, 8 ,false},
217243
{ ARM::VLD1q8HighTPseudo, ARM::VLD1d8T, true, false, false, SingleHighTSpc, 3, 8 ,false},
244+
{ ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleHighTSpc, 3, 8 ,false},
218245
{ ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleLowSpc, 4, 8 ,false},
219246
{ ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleLowSpc, 3, 8 ,false},
220247

@@ -2578,8 +2605,14 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
25782605
case ARM::VLD3d16Pseudo:
25792606
case ARM::VLD3d32Pseudo:
25802607
case ARM::VLD1d8TPseudo:
2608+
case ARM::VLD1d8TPseudoWB_fixed:
2609+
case ARM::VLD1d8TPseudoWB_register:
25812610
case ARM::VLD1d16TPseudo:
2611+
case ARM::VLD1d16TPseudoWB_fixed:
2612+
case ARM::VLD1d16TPseudoWB_register:
25822613
case ARM::VLD1d32TPseudo:
2614+
case ARM::VLD1d32TPseudoWB_fixed:
2615+
case ARM::VLD1d32TPseudoWB_register:
25832616
case ARM::VLD1d64TPseudo:
25842617
case ARM::VLD1d64TPseudoWB_fixed:
25852618
case ARM::VLD1d64TPseudoWB_register:
@@ -2599,26 +2632,40 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
25992632
case ARM::VLD4d16Pseudo:
26002633
case ARM::VLD4d32Pseudo:
26012634
case ARM::VLD1d8QPseudo:
2635+
case ARM::VLD1d8QPseudoWB_fixed:
2636+
case ARM::VLD1d8QPseudoWB_register:
26022637
case ARM::VLD1d16QPseudo:
2638+
case ARM::VLD1d16QPseudoWB_fixed:
2639+
case ARM::VLD1d16QPseudoWB_register:
26032640
case ARM::VLD1d32QPseudo:
2641+
case ARM::VLD1d32QPseudoWB_fixed:
2642+
case ARM::VLD1d32QPseudoWB_register:
26042643
case ARM::VLD1d64QPseudo:
26052644
case ARM::VLD1d64QPseudoWB_fixed:
26062645
case ARM::VLD1d64QPseudoWB_register:
26072646
case ARM::VLD1q8HighQPseudo:
2647+
case ARM::VLD1q8HighQPseudo_UPD:
26082648
case ARM::VLD1q8LowQPseudo_UPD:
26092649
case ARM::VLD1q8HighTPseudo:
2650+
case ARM::VLD1q8HighTPseudo_UPD:
26102651
case ARM::VLD1q8LowTPseudo_UPD:
26112652
case ARM::VLD1q16HighQPseudo:
2653+
case ARM::VLD1q16HighQPseudo_UPD:
26122654
case ARM::VLD1q16LowQPseudo_UPD:
26132655
case ARM::VLD1q16HighTPseudo:
2656+
case ARM::VLD1q16HighTPseudo_UPD:
26142657
case ARM::VLD1q16LowTPseudo_UPD:
26152658
case ARM::VLD1q32HighQPseudo:
2659+
case ARM::VLD1q32HighQPseudo_UPD:
26162660
case ARM::VLD1q32LowQPseudo_UPD:
26172661
case ARM::VLD1q32HighTPseudo:
2662+
case ARM::VLD1q32HighTPseudo_UPD:
26182663
case ARM::VLD1q32LowTPseudo_UPD:
26192664
case ARM::VLD1q64HighQPseudo:
2665+
case ARM::VLD1q64HighQPseudo_UPD:
26202666
case ARM::VLD1q64LowQPseudo_UPD:
26212667
case ARM::VLD1q64HighTPseudo:
2668+
case ARM::VLD1q64HighTPseudo_UPD:
26222669
case ARM::VLD1q64LowTPseudo_UPD:
26232670
case ARM::VLD4d8Pseudo_UPD:
26242671
case ARM::VLD4d16Pseudo_UPD:

llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1945,7 +1945,13 @@ static bool isVLDfixed(unsigned Opc)
19451945
case ARM::VLD1d64Qwb_fixed : return true;
19461946
case ARM::VLD1d32wb_fixed : return true;
19471947
case ARM::VLD1d64wb_fixed : return true;
1948+
case ARM::VLD1d8TPseudoWB_fixed : return true;
1949+
case ARM::VLD1d16TPseudoWB_fixed : return true;
1950+
case ARM::VLD1d32TPseudoWB_fixed : return true;
19481951
case ARM::VLD1d64TPseudoWB_fixed : return true;
1952+
case ARM::VLD1d8QPseudoWB_fixed : return true;
1953+
case ARM::VLD1d16QPseudoWB_fixed : return true;
1954+
case ARM::VLD1d32QPseudoWB_fixed : return true;
19491955
case ARM::VLD1d64QPseudoWB_fixed : return true;
19501956
case ARM::VLD1q8wb_fixed : return true;
19511957
case ARM::VLD1q16wb_fixed : return true;
@@ -2015,7 +2021,13 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
20152021
case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
20162022
case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
20172023
case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2024+
case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2025+
case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2026+
case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
20182027
case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2028+
case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2029+
case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2030+
case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
20192031
case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
20202032
case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
20212033
case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
@@ -4281,6 +4293,54 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
42814293
return;
42824294
}
42834295

4296+
case ARMISD::VLD1x2_UPD: {
4297+
if (Subtarget->hasNEON()) {
4298+
static const uint16_t DOpcodes[] = {
4299+
ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4300+
ARM::VLD1q64wb_fixed};
4301+
static const uint16_t QOpcodes[] = {
4302+
ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4303+
ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4304+
SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4305+
return;
4306+
}
4307+
break;
4308+
}
4309+
4310+
case ARMISD::VLD1x3_UPD: {
4311+
if (Subtarget->hasNEON()) {
4312+
static const uint16_t DOpcodes[] = {
4313+
ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4314+
ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4315+
static const uint16_t QOpcodes0[] = {
4316+
ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4317+
ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4318+
static const uint16_t QOpcodes1[] = {
4319+
ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4320+
ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4321+
SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4322+
return;
4323+
}
4324+
break;
4325+
}
4326+
4327+
case ARMISD::VLD1x4_UPD: {
4328+
if (Subtarget->hasNEON()) {
4329+
static const uint16_t DOpcodes[] = {
4330+
ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4331+
ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4332+
static const uint16_t QOpcodes0[] = {
4333+
ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4334+
ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4335+
static const uint16_t QOpcodes1[] = {
4336+
ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4337+
ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4338+
SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4339+
return;
4340+
}
4341+
break;
4342+
}
4343+
42844344
case ARMISD::VLD2LN_UPD: {
42854345
static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
42864346
ARM::VLD2LNd16Pseudo_UPD,

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1781,6 +1781,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
17811781
MAKE_CASE(ARMISD::VLD2_UPD)
17821782
MAKE_CASE(ARMISD::VLD3_UPD)
17831783
MAKE_CASE(ARMISD::VLD4_UPD)
1784+
MAKE_CASE(ARMISD::VLD1x2_UPD)
1785+
MAKE_CASE(ARMISD::VLD1x3_UPD)
1786+
MAKE_CASE(ARMISD::VLD1x4_UPD)
17841787
MAKE_CASE(ARMISD::VLD2LN_UPD)
17851788
MAKE_CASE(ARMISD::VLD3LN_UPD)
17861789
MAKE_CASE(ARMISD::VLD4LN_UPD)
@@ -14625,7 +14628,8 @@ static SDValue CombineBaseUpdate(SDNode *N,
1462514628
// Find the new opcode for the updating load/store.
1462614629
bool isLoadOp = true;
1462714630
bool isLaneOp = false;
14628-
// Workaround for vst1x and vld1x which do not have alignment operand.
14631+
// Workaround for vst1x and vld1x intrinsics which do not have alignment
14632+
// as an operand.
1462914633
bool hasAlignment = true;
1463014634
unsigned NewOpc = 0;
1463114635
unsigned NumVecs = 0;
@@ -14641,13 +14645,16 @@ static SDValue CombineBaseUpdate(SDNode *N,
1464114645
NumVecs = 3; break;
1464214646
case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
1464314647
NumVecs = 4; break;
14644-
case Intrinsic::arm_neon_vld1x2:
14645-
case Intrinsic::arm_neon_vld1x3:
14646-
case Intrinsic::arm_neon_vld1x4:
14648+
case Intrinsic::arm_neon_vld1x2: NewOpc = ARMISD::VLD1x2_UPD;
14649+
NumVecs = 2; hasAlignment = false; break;
14650+
case Intrinsic::arm_neon_vld1x3: NewOpc = ARMISD::VLD1x3_UPD;
14651+
NumVecs = 3; hasAlignment = false; break;
14652+
case Intrinsic::arm_neon_vld1x4: NewOpc = ARMISD::VLD1x4_UPD;
14653+
NumVecs = 4; hasAlignment = false; break;
1464714654
case Intrinsic::arm_neon_vld2dup:
1464814655
case Intrinsic::arm_neon_vld3dup:
1464914656
case Intrinsic::arm_neon_vld4dup:
14650-
// TODO: Support updating VLD1x and VLDxDUP nodes. For now, we just skip
14657+
// TODO: Support updating VLDxDUP nodes. For now, we just skip
1465114658
// combining base updates for such intrinsics.
1465214659
continue;
1465314660
case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;

llvm/lib/Target/ARM/ARMISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,9 @@ class VectorType;
330330
VLD2DUP_UPD,
331331
VLD3DUP_UPD,
332332
VLD4DUP_UPD,
333+
VLD1x2_UPD,
334+
VLD1x3_UPD,
335+
VLD1x4_UPD,
333336

334337
// NEON stores with post-increment base updates:
335338
VST1_UPD,

llvm/lib/Target/ARM/ARMInstrNEON.td

Lines changed: 46 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -705,21 +705,31 @@ defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
705705
defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
706706
defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
707707

708-
def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
709-
def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
710-
def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
711-
def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
712-
def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
708+
def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
709+
def VLD1d8TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
710+
def VLD1d8TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
711+
def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
712+
def VLD1d16TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
713+
def VLD1d16TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
714+
def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
715+
def VLD1d32TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
716+
def VLD1d32TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
717+
def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
718+
def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
713719
def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
714720

715-
def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
716-
def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
717-
def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
718-
def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
719-
def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
720-
def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
721-
def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
722-
def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
721+
def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
722+
def VLD1q8HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
723+
def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
724+
def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
725+
def VLD1q16HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
726+
def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
727+
def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
728+
def VLD1q32HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
729+
def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
730+
def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
731+
def VLD1q64HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
732+
def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
723733

724734
// ...with 4 registers
725735
class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
@@ -758,21 +768,31 @@ defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
758768
defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
759769
defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
760770

761-
def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
762-
def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
763-
def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
764-
def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
765-
def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
771+
def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
772+
def VLD1d8QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
773+
def VLD1d8QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
774+
def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
775+
def VLD1d16QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
776+
def VLD1d16QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
777+
def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
778+
def VLD1d32QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
779+
def VLD1d32QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
780+
def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
781+
def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
766782
def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
767783

768-
def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
769-
def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
770-
def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
771-
def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
772-
def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
773-
def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
774-
def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
775-
def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
784+
def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
785+
def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
786+
def VLD1q8HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
787+
def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
788+
def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
789+
def VLD1q16HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
790+
def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
791+
def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
792+
def VLD1q32HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
793+
def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
794+
def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
795+
def VLD1q64HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
776796

777797
// VLD2 : Vector Load (multiple 2-element structures)
778798
class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,

0 commit comments

Comments
 (0)