Skip to content

Commit 0e6ea09

Browse files
committed
[AArch64][GlobalISel] Scalarize zext with larger than i64 elements.
Like other operations larger than i64, we scalarize i128 and allow them to legalize from there. This also helps with v2i64 udiv by constant, which needs a legalize a umulh.
1 parent 6df192c commit 0e6ea09

File tree

5 files changed

+122
-57
lines changed

5 files changed

+122
-57
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -759,7 +759,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
759759
Query.Types[1].getScalarSizeInBits() == 16);
760760
})
761761
.clampMinNumElements(1, s8, 8)
762-
.clampMinNumElements(1, s16, 4);
762+
.clampMinNumElements(1, s16, 4)
763+
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
763764

764765
getActionDefinitionsBuilder(G_TRUNC)
765766
.legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})

llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
22
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4-
5-
; CHECK-GI: warning: Instruction selection used fallback path for udiv_v2i64
3+
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
64

75
define <16 x i8> @div16xi8(<16 x i8> %x) {
86
; CHECK-SD-LABEL: div16xi8:
@@ -219,25 +217,42 @@ define <4 x i32> @udiv32xi4(<4 x i32> %x) {
219217
}
220218

221219
define <2 x i64> @udiv_v2i64(<2 x i64> %a) {
222-
; CHECK-LABEL: udiv_v2i64:
223-
; CHECK: // %bb.0:
224-
; CHECK-NEXT: mov x8, #9363 // =0x2493
225-
; CHECK-NEXT: fmov x10, d0
226-
; CHECK-NEXT: mov x9, v0.d[1]
227-
; CHECK-NEXT: movk x8, #37449, lsl #16
228-
; CHECK-NEXT: movk x8, #18724, lsl #32
229-
; CHECK-NEXT: movk x8, #9362, lsl #48
230-
; CHECK-NEXT: umulh x11, x10, x8
231-
; CHECK-NEXT: umulh x8, x9, x8
232-
; CHECK-NEXT: sub x10, x10, x11
233-
; CHECK-NEXT: add x10, x11, x10, lsr #1
234-
; CHECK-NEXT: sub x9, x9, x8
235-
; CHECK-NEXT: add x8, x8, x9, lsr #1
236-
; CHECK-NEXT: lsr x9, x10, #2
237-
; CHECK-NEXT: fmov d0, x9
238-
; CHECK-NEXT: lsr x8, x8, #2
239-
; CHECK-NEXT: mov v0.d[1], x8
240-
; CHECK-NEXT: ret
220+
; CHECK-SD-LABEL: udiv_v2i64:
221+
; CHECK-SD: // %bb.0:
222+
; CHECK-SD-NEXT: mov x8, #9363 // =0x2493
223+
; CHECK-SD-NEXT: fmov x10, d0
224+
; CHECK-SD-NEXT: mov x9, v0.d[1]
225+
; CHECK-SD-NEXT: movk x8, #37449, lsl #16
226+
; CHECK-SD-NEXT: movk x8, #18724, lsl #32
227+
; CHECK-SD-NEXT: movk x8, #9362, lsl #48
228+
; CHECK-SD-NEXT: umulh x11, x10, x8
229+
; CHECK-SD-NEXT: umulh x8, x9, x8
230+
; CHECK-SD-NEXT: sub x10, x10, x11
231+
; CHECK-SD-NEXT: add x10, x11, x10, lsr #1
232+
; CHECK-SD-NEXT: sub x9, x9, x8
233+
; CHECK-SD-NEXT: add x8, x8, x9, lsr #1
234+
; CHECK-SD-NEXT: lsr x9, x10, #2
235+
; CHECK-SD-NEXT: fmov d0, x9
236+
; CHECK-SD-NEXT: lsr x8, x8, #2
237+
; CHECK-SD-NEXT: mov v0.d[1], x8
238+
; CHECK-SD-NEXT: ret
239+
;
240+
; CHECK-GI-LABEL: udiv_v2i64:
241+
; CHECK-GI: // %bb.0:
242+
; CHECK-GI-NEXT: mov x8, #9363 // =0x2493
243+
; CHECK-GI-NEXT: fmov x9, d0
244+
; CHECK-GI-NEXT: mov x10, v0.d[1]
245+
; CHECK-GI-NEXT: movk x8, #37449, lsl #16
246+
; CHECK-GI-NEXT: movk x8, #18724, lsl #32
247+
; CHECK-GI-NEXT: movk x8, #9362, lsl #48
248+
; CHECK-GI-NEXT: umulh x9, x9, x8
249+
; CHECK-GI-NEXT: umulh x8, x10, x8
250+
; CHECK-GI-NEXT: mov v1.d[0], x9
251+
; CHECK-GI-NEXT: mov v1.d[1], x8
252+
; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d
253+
; CHECK-GI-NEXT: usra v1.2d, v0.2d, #1
254+
; CHECK-GI-NEXT: ushr v0.2d, v1.2d, #2
255+
; CHECK-GI-NEXT: ret
241256
%r = udiv <2 x i64> %a, splat (i64 7)
242257
ret <2 x i64> %r
243258
}

llvm/test/CodeGen/AArch64/arm64-vabs.ll

Lines changed: 46 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck -check-prefixes=CHECK,CHECK-SD %s
3-
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4-
5-
; CHECK-GI: warning: Instruction selection used fallback path for uabd_i64
3+
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
64

75
define <8 x i16> @sabdl8h(ptr %A, ptr %B) nounwind {
86
; CHECK-LABEL: sabdl8h:
@@ -1803,17 +1801,51 @@ define <2 x i64> @uabd_i32(<2 x i32> %a, <2 x i32> %b) {
18031801
}
18041802

18051803
define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) {
1806-
; CHECK-LABEL: uabd_i64:
1807-
; CHECK: // %bb.0:
1808-
; CHECK-NEXT: cmgt.2d v2, v0, v1
1809-
; CHECK-NEXT: sub.2d v0, v0, v1
1810-
; CHECK-NEXT: mov x1, xzr
1811-
; CHECK-NEXT: mov x3, xzr
1812-
; CHECK-NEXT: eor.16b v0, v0, v2
1813-
; CHECK-NEXT: sub.2d v0, v2, v0
1814-
; CHECK-NEXT: mov.d x2, v0[1]
1815-
; CHECK-NEXT: fmov x0, d0
1816-
; CHECK-NEXT: ret
1804+
; CHECK-SD-LABEL: uabd_i64:
1805+
; CHECK-SD: // %bb.0:
1806+
; CHECK-SD-NEXT: cmgt.2d v2, v0, v1
1807+
; CHECK-SD-NEXT: sub.2d v0, v0, v1
1808+
; CHECK-SD-NEXT: mov x1, xzr
1809+
; CHECK-SD-NEXT: mov x3, xzr
1810+
; CHECK-SD-NEXT: eor.16b v0, v0, v2
1811+
; CHECK-SD-NEXT: sub.2d v0, v2, v0
1812+
; CHECK-SD-NEXT: mov.d x2, v0[1]
1813+
; CHECK-SD-NEXT: fmov x0, d0
1814+
; CHECK-SD-NEXT: ret
1815+
;
1816+
; CHECK-GI-LABEL: uabd_i64:
1817+
; CHECK-GI: // %bb.0:
1818+
; CHECK-GI-NEXT: mov d2, v0[1]
1819+
; CHECK-GI-NEXT: mov d3, v1[1]
1820+
; CHECK-GI-NEXT: fmov x8, d0
1821+
; CHECK-GI-NEXT: fmov x10, d1
1822+
; CHECK-GI-NEXT: asr x9, x8, #63
1823+
; CHECK-GI-NEXT: fmov x11, d2
1824+
; CHECK-GI-NEXT: fmov x13, d3
1825+
; CHECK-GI-NEXT: asr x12, x10, #63
1826+
; CHECK-GI-NEXT: subs x8, x8, x10
1827+
; CHECK-GI-NEXT: sbc x9, x9, x12
1828+
; CHECK-GI-NEXT: asr x14, x11, #63
1829+
; CHECK-GI-NEXT: asr x15, x13, #63
1830+
; CHECK-GI-NEXT: subs x10, x11, x13
1831+
; CHECK-GI-NEXT: sbc x11, x14, x15
1832+
; CHECK-GI-NEXT: cmp x9, #0
1833+
; CHECK-GI-NEXT: cset w12, lt
1834+
; CHECK-GI-NEXT: csel w12, wzr, w12, eq
1835+
; CHECK-GI-NEXT: cmp x11, #0
1836+
; CHECK-GI-NEXT: cset w13, lt
1837+
; CHECK-GI-NEXT: csel w13, wzr, w13, eq
1838+
; CHECK-GI-NEXT: negs x14, x8
1839+
; CHECK-GI-NEXT: ngc x15, x9
1840+
; CHECK-GI-NEXT: negs x16, x10
1841+
; CHECK-GI-NEXT: ngc x17, x11
1842+
; CHECK-GI-NEXT: tst w12, #0x1
1843+
; CHECK-GI-NEXT: csel x0, x14, x8, ne
1844+
; CHECK-GI-NEXT: csel x1, x15, x9, ne
1845+
; CHECK-GI-NEXT: tst w13, #0x1
1846+
; CHECK-GI-NEXT: csel x2, x16, x10, ne
1847+
; CHECK-GI-NEXT: csel x3, x17, x11, ne
1848+
; CHECK-GI-NEXT: ret
18171849
%aext = sext <2 x i64> %a to <2 x i128>
18181850
%bext = sext <2 x i64> %b to <2 x i128>
18191851
%abdiff = sub nsw <2 x i128> %aext, %bext

llvm/test/CodeGen/AArch64/sext.ll

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
22
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4-
5-
; CHECK-GI: warning: Instruction selection used fallback path for sext_v2i64_v2i128
3+
; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
64

75
define i16 @sext_i8_to_i16(i8 %a) {
86
; CHECK-LABEL: sext_i8_to_i16:
@@ -1245,15 +1243,26 @@ entry:
12451243
}
12461244

12471245
define <2 x i128> @sext_v2i64_v2i128(<2 x i64> %a) {
1248-
; CHECK-LABEL: sext_v2i64_v2i128:
1249-
; CHECK: // %bb.0: // %entry
1250-
; CHECK-NEXT: mov x8, v0.d[1]
1251-
; CHECK-NEXT: dup v1.2d, v0.d[1]
1252-
; CHECK-NEXT: fmov x0, d0
1253-
; CHECK-NEXT: fmov x2, d1
1254-
; CHECK-NEXT: asr x1, x0, #63
1255-
; CHECK-NEXT: asr x3, x8, #63
1256-
; CHECK-NEXT: ret
1246+
; CHECK-SD-LABEL: sext_v2i64_v2i128:
1247+
; CHECK-SD: // %bb.0: // %entry
1248+
; CHECK-SD-NEXT: mov x8, v0.d[1]
1249+
; CHECK-SD-NEXT: dup v1.2d, v0.d[1]
1250+
; CHECK-SD-NEXT: fmov x0, d0
1251+
; CHECK-SD-NEXT: fmov x2, d1
1252+
; CHECK-SD-NEXT: asr x1, x0, #63
1253+
; CHECK-SD-NEXT: asr x3, x8, #63
1254+
; CHECK-SD-NEXT: ret
1255+
;
1256+
; CHECK-GI-LABEL: sext_v2i64_v2i128:
1257+
; CHECK-GI: // %bb.0: // %entry
1258+
; CHECK-GI-NEXT: mov d1, v0.d[1]
1259+
; CHECK-GI-NEXT: fmov x8, d0
1260+
; CHECK-GI-NEXT: fmov x0, d0
1261+
; CHECK-GI-NEXT: asr x1, x8, #63
1262+
; CHECK-GI-NEXT: fmov x9, d1
1263+
; CHECK-GI-NEXT: fmov x2, d1
1264+
; CHECK-GI-NEXT: asr x3, x9, #63
1265+
; CHECK-GI-NEXT: ret
12571266
entry:
12581267
%c = sext <2 x i64> %a to <2 x i128>
12591268
ret <2 x i128> %c

llvm/test/CodeGen/AArch64/zext.ll

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
44

55
; CHECK-GI: warning: Instruction selection used fallback path for zext_v16i10_v16i16
6-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zext_v2i64_v2i128
76

87
define i16 @zext_i8_to_i16(i8 %a) {
98
; CHECK-LABEL: zext_i8_to_i16:
@@ -1215,13 +1214,22 @@ entry:
12151214
}
12161215

12171216
define <2 x i128> @zext_v2i64_v2i128(<2 x i64> %a) {
1218-
; CHECK-LABEL: zext_v2i64_v2i128:
1219-
; CHECK: // %bb.0: // %entry
1220-
; CHECK-NEXT: mov x2, v0.d[1]
1221-
; CHECK-NEXT: fmov x0, d0
1222-
; CHECK-NEXT: mov x1, xzr
1223-
; CHECK-NEXT: mov x3, xzr
1224-
; CHECK-NEXT: ret
1217+
; CHECK-SD-LABEL: zext_v2i64_v2i128:
1218+
; CHECK-SD: // %bb.0: // %entry
1219+
; CHECK-SD-NEXT: mov x2, v0.d[1]
1220+
; CHECK-SD-NEXT: fmov x0, d0
1221+
; CHECK-SD-NEXT: mov x1, xzr
1222+
; CHECK-SD-NEXT: mov x3, xzr
1223+
; CHECK-SD-NEXT: ret
1224+
;
1225+
; CHECK-GI-LABEL: zext_v2i64_v2i128:
1226+
; CHECK-GI: // %bb.0: // %entry
1227+
; CHECK-GI-NEXT: mov d1, v0.d[1]
1228+
; CHECK-GI-NEXT: fmov x0, d0
1229+
; CHECK-GI-NEXT: mov x1, xzr
1230+
; CHECK-GI-NEXT: mov x3, xzr
1231+
; CHECK-GI-NEXT: fmov x2, d1
1232+
; CHECK-GI-NEXT: ret
12251233
entry:
12261234
%c = zext <2 x i64> %a to <2 x i128>
12271235
ret <2 x i128> %c

0 commit comments

Comments
 (0)