Skip to content

Commit f7b36b3

Browse files
committed
[AArch64][GlobalISel] Manually select G_DUP with s8/s16 gpr scalar operands.
These don't get selected by the imported patterns, and avoiding generating them is a whole load of not-worth-it-hassle (until we have fp types in GlobalISel).
1 parent ade6fa4 commit f7b36b3

File tree

2 files changed

+64
-0
lines changed

2 files changed

+64
-0
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2971,6 +2971,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
29712971
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
29722972
}
29732973
}
2974+
case AArch64::G_DUP: {
2975+
// When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
2976+
// imported patterns. Do it manually here. Avoiding generating s16 gpr is
2977+
// difficult because at RBS we may end up pessimizing the fpr case if we
2978+
// decided to add an anyextend to fix this. Manual selection is the most
2979+
// robust solution for now.
2980+
Register SrcReg = I.getOperand(1).getReg();
2981+
if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
2982+
return false; // We expect the fpr regbank case to be imported.
2983+
LLT SrcTy = MRI.getType(SrcReg);
2984+
if (SrcTy.getSizeInBits() == 16)
2985+
I.setDesc(TII.get(AArch64::DUPv8i16gpr));
2986+
else if (SrcTy.getSizeInBits() == 8)
2987+
I.setDesc(TII.get(AArch64::DUPv16i8gpr));
2988+
else
2989+
return false;
2990+
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2991+
}
29742992
case TargetOpcode::G_INTRINSIC_TRUNC:
29752993
return selectIntrinsicTrunc(I, MRI);
29762994
case TargetOpcode::G_INTRINSIC_ROUND:

llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,29 @@ body: |
223223
$q0 = COPY %dup(<8 x s16>)
224224
RET_ReallyLR implicit $q0
225225
226+
...
227+
---
228+
name: DUPv8i16gpr_s16_src
229+
alignment: 4
230+
legalized: true
231+
regBankSelected: true
232+
tracksRegLiveness: true
233+
body: |
234+
bb.0.entry:
235+
liveins: $w0
236+
; Checks that we can still select the gpr variant if the scalar is an s16.
237+
; CHECK-LABEL: name: DUPv8i16gpr_s16_src
238+
; CHECK: liveins: $w0
239+
; CHECK: %copy:gpr32 = COPY $w0
240+
; CHECK: %dup:fpr128 = DUPv8i16gpr %copy
241+
; CHECK: $q0 = COPY %dup
242+
; CHECK: RET_ReallyLR implicit $q0
243+
%copy:gpr(s32) = COPY $w0
244+
%trunc:gpr(s16) = G_TRUNC %copy
245+
%dup:fpr(<8 x s16>) = G_DUP %trunc(s16)
246+
$q0 = COPY %dup(<8 x s16>)
247+
RET_ReallyLR implicit $q0
248+
226249
...
227250
---
228251
name: DUPv8i8gpr
@@ -264,3 +287,26 @@ body: |
264287
%dup:fpr(<16 x s8>) = G_DUP %copy(s32)
265288
$q0 = COPY %dup(<16 x s8>)
266289
RET_ReallyLR implicit $q0
290+
...
291+
---
292+
name: DUPv16i8gpr_s8_src
293+
alignment: 4
294+
legalized: true
295+
regBankSelected: true
296+
tracksRegLiveness: true
297+
body: |
298+
bb.0.entry:
299+
liveins: $w0
300+
; Check we still select the gpr variant when scalar is an s8.
301+
; CHECK-LABEL: name: DUPv16i8gpr_s8_src
302+
; CHECK: liveins: $w0
303+
; CHECK: %copy:gpr32 = COPY $w0
304+
; CHECK: %dup:fpr128 = DUPv16i8gpr %copy
305+
; CHECK: $q0 = COPY %dup
306+
; CHECK: RET_ReallyLR implicit $q0
307+
%copy:gpr(s32) = COPY $w0
308+
%trunc:gpr(s8) = G_TRUNC %copy
309+
%dup:fpr(<16 x s8>) = G_DUP %trunc(s8)
310+
$q0 = COPY %dup(<16 x s8>)
311+
RET_ReallyLR implicit $q0
312+
...

0 commit comments

Comments
 (0)