Skip to content

Commit 9e23dfe

Browse files
[ARM][NEON] Add constraint to vld2 Odd/Even Pseudo instructions.
This ensures the odd/even pseudo instructions are allocated to the same register range. This fixes #71763. (#71763)
1 parent 14a027b commit 9e23dfe

File tree

4 files changed

+30
-24
lines changed

4 files changed

+30
-24
lines changed

llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -640,12 +640,9 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
640640
// has an extra operand that is a use of the super-register. Record the
641641
// operand index and skip over it.
642642
unsigned SrcOpIdx = 0;
643-
if (!IsVLD2DUP) {
644-
if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc ||
645-
RegSpc == SingleLowSpc || RegSpc == SingleHighQSpc ||
646-
RegSpc == SingleHighTSpc)
647-
SrcOpIdx = OpIdx++;
648-
}
643+
if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc || RegSpc == SingleLowSpc ||
644+
RegSpc == SingleHighQSpc || RegSpc == SingleHighTSpc)
645+
SrcOpIdx = OpIdx++;
649646

650647
// Copy the predicate operands.
651648
MIB.add(MI.getOperand(OpIdx++));

llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3032,11 +3032,6 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
30323032
}
30333033
if (is64BitVector || NumVecs == 1) {
30343034
// Double registers and VLD1 quad registers are directly supported.
3035-
} else if (NumVecs == 2) {
3036-
const SDValue OpsA[] = {MemAddr, Align, Pred, Reg0, Chain};
3037-
SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3038-
MVT::Other, OpsA);
3039-
Chain = SDValue(VLdA, 1);
30403035
} else {
30413036
SDValue ImplDef = SDValue(
30423037
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);

llvm/lib/Target/ARM/ARMInstrNEON.td

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,12 +1491,26 @@ def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
14911491
def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
14921492
addrmode6dupalign64>;
14931493

1494-
def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1495-
def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1496-
def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1497-
def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1498-
def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1499-
def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1494+
// Duplicate of VLDQQPseudo but with a constraint variable
1495+
// to ensure the odd and even lanes use the same register range
1496+
class VLDQQPseudoConstrained<InstrItinClass itin>
1497+
: PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr, QQPR: $src), itin,
1498+
"$src = $dst">;
1499+
class VLDQQWBPseudoConstrained<InstrItinClass itin>
1500+
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
1501+
(ins addrmode6:$addr, am6offset:$offset, QQPR: $src), itin,
1502+
"$addr.addr = $wb, $src = $dst">;
1503+
class VLDQQWBfixedPseudoConstrained<InstrItinClass itin>
1504+
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
1505+
(ins addrmode6:$addr, QQPR: $src), itin,
1506+
"$addr.addr = $wb, $src = $dst">;
1507+
1508+
def VLD2DUPq8EvenPseudo : VLDQQPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1509+
def VLD2DUPq8OddPseudo : VLDQQPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1510+
def VLD2DUPq16EvenPseudo : VLDQQPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1511+
def VLD2DUPq16OddPseudo : VLDQQPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1512+
def VLD2DUPq32EvenPseudo : VLDQQPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1513+
def VLD2DUPq32OddPseudo : VLDQQPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
15001514

15011515
// ...with address register writeback:
15021516
multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
@@ -1534,12 +1548,12 @@ defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
15341548
defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
15351549
addrmode6dupalign64>;
15361550

1537-
def VLD2DUPq8OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1538-
def VLD2DUPq16OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1539-
def VLD2DUPq32OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1540-
def VLD2DUPq8OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1541-
def VLD2DUPq16OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1542-
def VLD2DUPq32OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1551+
def VLD2DUPq8OddPseudoWB_fixed : VLDQQWBfixedPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1552+
def VLD2DUPq16OddPseudoWB_fixed : VLDQQWBfixedPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1553+
def VLD2DUPq32OddPseudoWB_fixed : VLDQQWBfixedPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1554+
def VLD2DUPq8OddPseudoWB_register : VLDQQWBPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1555+
def VLD2DUPq16OddPseudoWB_register : VLDQQWBPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1556+
def VLD2DUPq32OddPseudoWB_register : VLDQQWBPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
15431557

15441558
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
15451559
class VLD3DUP<bits<4> op7_4, string Dt>

llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ entry:
488488
define arm_aapcs_vfpcc [2 x <4 x i32>] @test_vld2q_dup_bf16(ptr %ptr) {
489489
; CHECK-LABEL: test_vld2q_dup_bf16:
490490
; CHECK: @ %bb.0: @ %entry
491-
; CHECK-NEXT: vld2.16 {d16[], d18[]}, [r0]
491+
; CHECK-NEXT: vld2.16 {d0[], d2[]}, [r0]
492492
; CHECK-NEXT: vld2.16 {d1[], d3[]}, [r0]
493493
; CHECK-NEXT: bx lr
494494
entry:

0 commit comments

Comments
 (0)