Skip to content

Commit 44479b8

Browse files
committed
[AArch64] Ensure constrained register class in INS peephole.
Ensure we constrain the register class of the NewDef to that of OldDef, in case they do not match. Fixes #63777
1 parent dcfa2ab commit 44479b8

File tree

2 files changed

+68
-0
lines changed

2 files changed

+68
-0
lines changed

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,7 @@ bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) {
663663
// Let's remove MIs for high 64-bits.
664664
Register OldDef = MI.getOperand(0).getReg();
665665
Register NewDef = MI.getOperand(1).getReg();
666+
MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));
666667
MRI->replaceRegWith(OldDef, NewDef);
667668
MI.eraseFromParent();
668669

llvm/test/CodeGen/AArch64/peephole-insert-subreg.mir

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,16 @@
1010
ret i64 %shl
1111
}
1212

13+
define <8 x i16> @pr63777(<8 x i16> %vmull.i, ptr %iPtr) {
14+
entry:
15+
%ld = load <8 x i8>, ptr %iPtr, align 1
16+
%shuffle.i.i = shufflevector <8 x i8> %ld, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17+
%bc = bitcast <16 x i8> %shuffle.i.i to <8 x i16>
18+
%vecinit7.i = shufflevector <8 x i16> %bc, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
19+
%mul.i109 = mul <8 x i16> %vecinit7.i, %vmull.i
20+
%or.i = or <8 x i16> %mul.i109, %bc
21+
ret <8 x i16> %or.i
22+
}
1323
...
1424
---
1525
---
@@ -45,3 +55,60 @@ body: |
4555
%4:gpr64 = nuw nsw UBFMXri killed %2, 63, 31
4656
$x0 = COPY %4
4757
RET_ReallyLR implicit $x0
58+
59+
...
60+
---
61+
name: pr63777
62+
tracksRegLiveness: true
63+
registers:
64+
- { id: 0, class: fpr128, preferred-register: '' }
65+
- { id: 1, class: gpr64common, preferred-register: '' }
66+
- { id: 2, class: fpr64, preferred-register: '' }
67+
- { id: 3, class: fpr128, preferred-register: '' }
68+
- { id: 4, class: fpr64, preferred-register: '' }
69+
- { id: 5, class: fpr128, preferred-register: '' }
70+
- { id: 6, class: fpr128, preferred-register: '' }
71+
- { id: 7, class: fpr128, preferred-register: '' }
72+
- { id: 8, class: fpr128, preferred-register: '' }
73+
- { id: 9, class: fpr128_lo, preferred-register: '' }
74+
- { id: 10, class: fpr128, preferred-register: '' }
75+
- { id: 11, class: fpr128, preferred-register: '' }
76+
liveins:
77+
- { reg: '$q0', virtual-reg: '%0' }
78+
- { reg: '$x0', virtual-reg: '%1' }
79+
body: |
80+
bb.0.entry:
81+
liveins: $q0, $x0
82+
83+
; CHECK-LABEL: name: pr63777
84+
; CHECK: liveins: $q0, $x0
85+
; CHECK-NEXT: {{ $}}
86+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
87+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0
88+
; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load (s64) from %ir.iPtr, align 1)
89+
; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0
90+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[MOVIv2d_ns]].dsub
91+
; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
92+
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], killed [[COPY2]], %subreg.dsub
93+
; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
94+
; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128_lo = INSERT_SUBREG [[DEF1]], killed [[LDRDui]], %subreg.dsub
95+
; CHECK-NEXT: [[MULv8i16_indexed:%[0-9]+]]:fpr128 = MULv8i16_indexed [[COPY1]], [[INSERT_SUBREG1]], 1
96+
; CHECK-NEXT: [[ORRv16i8_:%[0-9]+]]:fpr128 = ORRv16i8 killed [[MULv8i16_indexed]], [[INSERT_SUBREG1]]
97+
; CHECK-NEXT: $q0 = COPY [[ORRv16i8_]]
98+
; CHECK-NEXT: RET_ReallyLR implicit $q0
99+
%1:gpr64common = COPY $x0
100+
%0:fpr128 = COPY $q0
101+
%2:fpr64 = LDRDui %1, 0 :: (load (s64) from %ir.iPtr, align 1)
102+
%3:fpr128 = MOVIv2d_ns 0
103+
%4:fpr64 = COPY %3.dsub
104+
%6:fpr128 = IMPLICIT_DEF
105+
%5:fpr128 = INSERT_SUBREG %6, killed %4, %subreg.dsub
106+
%8:fpr128 = IMPLICIT_DEF
107+
%7:fpr128 = INSERT_SUBREG %8, killed %2, %subreg.dsub
108+
%9:fpr128_lo = INSvi64lane %7, 1, killed %5, 0
109+
%10:fpr128 = MULv8i16_indexed %0, %9, 1
110+
%11:fpr128 = ORRv16i8 killed %10, %9
111+
$q0 = COPY %11
112+
RET_ReallyLR implicit $q0
113+
114+
...

0 commit comments

Comments
 (0)