@@ -1491,12 +1491,26 @@ def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1491
1491
def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1492
1492
addrmode6dupalign64>;
1493
1493
1494
- def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1495
- def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1496
- def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1497
- def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1498
- def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1499
- def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1494
+ // Duplicate of VLDQQPseudo but with a constraint variable
1495
+ // to ensure the odd and even lanes use the same register range
1496
+ class VLDQQPseudoInputDST<InstrItinClass itin>
1497
+ : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr, QQPR: $src), itin,
1498
+ "$src = $dst">;
1499
+ class VLDQQWBPseudoInputDST<InstrItinClass itin>
1500
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
1501
+ (ins addrmode6:$addr, am6offset:$offset, QQPR: $src), itin,
1502
+ "$addr.addr = $wb, $src = $dst">;
1503
+ class VLDQQWBfixedPseudoInputDST<InstrItinClass itin>
1504
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
1505
+ (ins addrmode6:$addr, QQPR: $src), itin,
1506
+ "$addr.addr = $wb, $src = $dst">;
1507
+
1508
+ def VLD2DUPq8EvenPseudo : VLDQQPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1509
+ def VLD2DUPq8OddPseudo : VLDQQPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1510
+ def VLD2DUPq16EvenPseudo : VLDQQPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1511
+ def VLD2DUPq16OddPseudo : VLDQQPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1512
+ def VLD2DUPq32EvenPseudo : VLDQQPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1513
+ def VLD2DUPq32OddPseudo : VLDQQPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1500
1514
1501
1515
// ...with address register writeback:
1502
1516
multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
@@ -1534,12 +1548,12 @@ defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1534
1548
defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1535
1549
addrmode6dupalign64>;
1536
1550
1537
- def VLD2DUPq8OddPseudoWB_fixed : VLDQQWBfixedPseudo <IIC_VLD2dup>, Sched<[WriteVLD2]>;
1538
- def VLD2DUPq16OddPseudoWB_fixed : VLDQQWBfixedPseudo <IIC_VLD2dup>, Sched<[WriteVLD2]>;
1539
- def VLD2DUPq32OddPseudoWB_fixed : VLDQQWBfixedPseudo <IIC_VLD2dup>, Sched<[WriteVLD2]>;
1540
- def VLD2DUPq8OddPseudoWB_register : VLDQQWBPseudo <IIC_VLD2dup>, Sched<[WriteVLD2]>;
1541
- def VLD2DUPq16OddPseudoWB_register : VLDQQWBPseudo <IIC_VLD2dup>, Sched<[WriteVLD2]>;
1542
- def VLD2DUPq32OddPseudoWB_register : VLDQQWBPseudo <IIC_VLD2dup>, Sched<[WriteVLD2]>;
1551
+ def VLD2DUPq8OddPseudoWB_fixed : VLDQQWBfixedPseudoInputDST <IIC_VLD2dup>, Sched<[WriteVLD2]>;
1552
+ def VLD2DUPq16OddPseudoWB_fixed : VLDQQWBfixedPseudoInputDST <IIC_VLD2dup>, Sched<[WriteVLD2]>;
1553
+ def VLD2DUPq32OddPseudoWB_fixed : VLDQQWBfixedPseudoInputDST <IIC_VLD2dup>, Sched<[WriteVLD2]>;
1554
+ def VLD2DUPq8OddPseudoWB_register : VLDQQWBPseudoInputDST <IIC_VLD2dup>, Sched<[WriteVLD2]>;
1555
+ def VLD2DUPq16OddPseudoWB_register : VLDQQWBPseudoInputDST <IIC_VLD2dup>, Sched<[WriteVLD2]>;
1556
+ def VLD2DUPq32OddPseudoWB_register : VLDQQWBPseudoInputDST <IIC_VLD2dup>, Sched<[WriteVLD2]>;
1543
1557
1544
1558
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
1545
1559
class VLD3DUP<bits<4> op7_4, string Dt>
0 commit comments