Skip to content

Commit 00efb34

Browse files
committed
[AArch64][GlobalISel] Fix crash during G_SHUFFLE_VECTOR legalization.
A new widening rule was running before the shuffle was canonicalized into a homogenous form. Moving the rules around to ensure it's done before the widening fixes the crash, although this particular test still falls back.
1 parent 50bdc6f commit 00efb34

File tree

2 files changed

+38
-6
lines changed

2 files changed

+38
-6
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -956,18 +956,18 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
956956
},
957957
changeTo(1, 0))
958958
.moreElementsToNextPow2(0)
959-
.widenScalarOrEltToNextPow2OrMinSize(0, 8)
960-
.clampNumElements(0, v8s8, v16s8)
961-
.clampNumElements(0, v4s16, v8s16)
962-
.clampNumElements(0, v4s32, v4s32)
963-
.clampNumElements(0, v2s64, v2s64)
964959
.moreElementsIf(
965960
[](const LegalityQuery &Query) {
966961
return Query.Types[0].isVector() && Query.Types[1].isVector() &&
967962
Query.Types[0].getNumElements() <
968963
Query.Types[1].getNumElements();
969964
},
970-
changeTo(0, 1));
965+
changeTo(0, 1))
966+
.widenScalarOrEltToNextPow2OrMinSize(0, 8)
967+
.clampNumElements(0, v8s8, v16s8)
968+
.clampNumElements(0, v4s16, v8s16)
969+
.clampNumElements(0, v4s32, v4s32)
970+
.clampNumElements(0, v2s64, v2s64);
971971

972972
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
973973
.legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -global-isel-abort=2 -global-isel -o - %s | FileCheck %s
3+
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
4+
target triple = "arm64-apple-macosx11.0.0"
5+
6+
declare i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32>) #0
7+
8+
; This test currently falls back but ensures we don't crash.
9+
10+
define i32 @bar() {
11+
; CHECK-LABEL: bar:
12+
; CHECK: ; %bb.0: ; %bb
13+
; CHECK-NEXT: movi.2d v0, #0000000000000000
14+
; CHECK-NEXT: addv.4s s0, v0
15+
; CHECK-NEXT: fmov w0, s0
16+
; CHECK-NEXT: ret
17+
bb:
18+
%shufflevector = shufflevector <8 x i1> zeroinitializer, <8 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
19+
%zext = zext <4 x i1> %shufflevector to <4 x i32>
20+
%call = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %zext)
21+
%icmp = icmp eq i32 %call, 0
22+
br i1 %icmp, label %bb1, label %bb2
23+
24+
bb1: ; preds = %bb2, %bb
25+
ret i32 %call
26+
27+
bb2: ; preds = %bb
28+
%sext = sext i32 0 to i64
29+
br label %bb1
30+
}
31+
32+
attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }

0 commit comments

Comments
 (0)