Skip to content

Commit 71d6b0b

Browse files
authored
[AArch64][GlobalISel] Lower shuffle vector with scalar destinations. (#121384)
I believe these are usually canonicalized to vector extracts in most situations, but under -O0 we might trigger failures in the widening code if we do not handle scalar destinations correctly. The simplest solution should be to lower the shuffle to an extract. Fixes #121365.
1 parent 5056a4b commit 71d6b0b

File tree

3 files changed

+76
-3
lines changed

3 files changed

+76
-3
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,10 +1062,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
10621062
return llvm::is_contained(
10631063
{v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
10641064
})
1065-
// G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
1066-
// just want those lowered into G_BUILD_VECTOR
1065+
// G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors) or scalar
1066+
// destinations, we just want those lowered into G_BUILD_VECTOR or
1067+
// G_EXTRACT_ELEMENT.
10671068
.lowerIf([=](const LegalityQuery &Query) {
1068-
return !Query.Types[1].isVector();
1069+
return !Query.Types[0].isVector() || !Query.Types[1].isVector();
10691070
})
10701071
.moreElementsIf(
10711072
[](const LegalityQuery &Query) {
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple aarch64 -O0 -global-isel -o - %s | FileCheck %s
3+
4+
define <1 x i1> @shuffle_extract_4(<8 x i1> %a, <8 x i1> %b) {
5+
; CHECK-LABEL: shuffle_extract_4:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
8+
; CHECK-NEXT: umov w8, v0.h[4]
9+
; CHECK-NEXT: and w0, w8, #0x1
10+
; CHECK-NEXT: ret
11+
%extractvec60 = shufflevector <8 x i1> %a, <8 x i1> %b, <1 x i32> <i32 4>
12+
ret <1 x i1> %extractvec60
13+
}
14+
15+
define <1 x i1> @shuffle_extract_12(<8 x i1> %a, <8 x i1> %b) {
16+
; CHECK-LABEL: shuffle_extract_12:
17+
; CHECK: // %bb.0:
18+
; CHECK-NEXT: ushll v0.8h, v1.8b, #0
19+
; CHECK-NEXT: umov w8, v0.h[4]
20+
; CHECK-NEXT: and w0, w8, #0x1
21+
; CHECK-NEXT: ret
22+
%extractvec60 = shufflevector <8 x i1> %a, <8 x i1> %b, <1 x i32> <i32 12>
23+
ret <1 x i1> %extractvec60
24+
}
25+
26+
define <1 x i1> @shuffle_extract_p(<8 x i1> %a, <8 x i1> %b) {
27+
; CHECK-LABEL: shuffle_extract_p:
28+
; CHECK: // %bb.0:
29+
; CHECK-NEXT: // implicit-def: $w8
30+
; CHECK-NEXT: and w0, w8, #0x1
31+
; CHECK-NEXT: ret
32+
%extractvec60 = shufflevector <8 x i1> %a, <8 x i1> %b, <1 x i32> <i32 poison>
33+
ret <1 x i1> %extractvec60
34+
}
35+
36+
define <1 x i32> @shufflevector_v1i32(<1 x i32> %a, <1 x i32> %b) {
37+
; CHECK-LABEL: shufflevector_v1i32:
38+
; CHECK: // %bb.0:
39+
; CHECK-NEXT: fmov d0, d1
40+
; CHECK-NEXT: ret
41+
%c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> <i32 1>
42+
ret <1 x i32> %c
43+
}

llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,3 +618,32 @@ body: |
618618
RET_ReallyLR implicit $q0
619619
620620
...
621+
---
622+
name: shuffle_v8i1_v1i8
623+
alignment: 4
624+
tracksRegLiveness: true
625+
body: |
626+
bb.1:
627+
liveins: $d0, $d1
628+
; CHECK-LABEL: name: shuffle_v8i1_v1i8
629+
; CHECK: liveins: $d0, $d1
630+
; CHECK-NEXT: {{ $}}
631+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d1
632+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
633+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[COPY]](<8 x s8>)
634+
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[ANYEXT]](<8 x s16>), [[C]](s64)
635+
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16)
636+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
637+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
638+
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
639+
; CHECK-NEXT: RET_ReallyLR implicit $w0
640+
%2:_(<8 x s8>) = COPY $d0
641+
%0:_(<8 x s1>) = G_TRUNC %2:_(<8 x s8>)
642+
%3:_(<8 x s8>) = COPY $d1
643+
%1:_(<8 x s1>) = G_TRUNC %3:_(<8 x s8>)
644+
%4:_(s1) = G_SHUFFLE_VECTOR %0:_(<8 x s1>), %1:_, shufflemask(12)
645+
%5:_(s8) = G_ZEXT %4:_(s1)
646+
%6:_(s32) = G_ANYEXT %5:_(s8)
647+
$w0 = COPY %6:_(s32)
648+
RET_ReallyLR implicit $w0
649+
...

0 commit comments

Comments
 (0)