Skip to content

Commit 0e49c54

Browse files
committed
[AArch64] Fix selection of G_UNMERGE <2 x s16>
Differential revision: https://reviews.llvm.org/D106007
1 parent df538fd commit 0e49c54

File tree

2 files changed

+55
-1
lines changed

2 files changed

+55
-1
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3988,6 +3988,13 @@ bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
39883988
} else {
39893989
// No. We have to perform subregister inserts. For each insert, create an
39903990
// implicit def and a subregister insert, and save the register we create.
3991+
const TargetRegisterClass *RC =
3992+
getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI),
3993+
WideTy.getScalarSizeInBits() * NumElts);
3994+
unsigned SubReg = 0;
3995+
bool Found = getSubRegForClass(RC, TRI, SubReg);
3996+
(void)Found;
3997+
assert(Found && "expected to find last operand's subeg idx");
39913998
for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
39923999
Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
39934000
MachineInstr &ImpDefMI =
@@ -4001,7 +4008,7 @@ bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
40014008
TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
40024009
.addUse(ImpDefReg)
40034010
.addUse(SrcReg)
4004-
.addImm(AArch64::dsub);
4011+
.addImm(SubReg);
40054012

40064013
constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
40074014
constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);

llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
ret <4 x float> %a
1212
}
1313

14+
define <2 x half> @test_v2s16_unmerge(<2 x half> %a) {
15+
ret <2 x half> %a
16+
}
17+
1418
define <4 x half> @test_v4s16_unmerge(<4 x half> %a) {
1519
ret <4 x half> %a
1620
}
@@ -87,6 +91,49 @@ body: |
8791
RET_ReallyLR implicit $q0
8892
...
8993
---
94+
name: test_v2s16_unmerge
95+
legalized: true
96+
regBankSelected: true
97+
tracksRegLiveness: true
98+
registers:
99+
- { id: 0, class: fpr }
100+
- { id: 1, class: fpr }
101+
- { id: 2, class: fpr }
102+
- { id: 3, class: fpr }
103+
- { id: 4, class: fpr }
104+
- { id: 5, class: fpr }
105+
body: |
106+
bb.1 (%ir-block.0):
107+
liveins: $s0
108+
; CHECK-LABEL: name: test_v2s16_unmerge
109+
110+
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
111+
%0:fpr(<2 x s16>) = COPY $s0
112+
113+
; Since 2 * 16 != 128, we need to widen using implicit defs.
114+
; Note that we expect to reuse one of the INSERT_SUBREG results, as CPYi16
115+
; expects a lane > 0.
116+
; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
117+
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.ssub
118+
; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY [[INSERT_SUBREG]].hsub
119+
; CHECK: [[CPYi16_:%[0-9]+]]:fpr16 = CPYi16 [[INSERT_SUBREG]], 1
120+
%2:fpr(s16), %3:fpr(s16) = G_UNMERGE_VALUES %0(<2 x s16>)
121+
122+
; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
123+
; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.hsub
124+
; CHECK: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF
125+
; CHECK: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[CPYi16_]], %subreg.hsub
126+
; CHECK: [[INSvi16lane:%[0-9]+]]:fpr128 = INSvi16lane [[INSERT_SUBREG1]], 1, [[INSERT_SUBREG2]], 0
127+
; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[INSvi16lane]].ssub
128+
%1:fpr(<2 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16)
129+
130+
; CHECK: $s0 = COPY [[COPY2]]
131+
$s0 = COPY %1(<2 x s16>)
132+
133+
; CHECK: RET_ReallyLR implicit $s0
134+
RET_ReallyLR implicit $s0
135+
...
136+
---
90137
name: test_v4s16_unmerge
91138
alignment: 4
92139
legalized: true

0 commit comments

Comments
 (0)