Skip to content

Commit a324d8f

Browse files
committed
[AArch64] Add tests for 128-bit shift variations.
It looks like there's still some room for optimization; the funnel shift lowering is more efficient than the lowering of actual shifts.
1 parent 69feac1 commit a324d8f

File tree

2 files changed

+115
-39
lines changed

2 files changed

+115
-39
lines changed
Lines changed: 89 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,108 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
12
; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone | FileCheck %s
23

34
define i128 @shl(i128 %r, i128 %s) nounwind readnone {
45
; CHECK-LABEL: shl:
5-
; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2
6-
; CHECK: lsr [[LO_FOR_HI_NORMAL:x[0-9]+]], x0, [[REV_SHIFT]]
7-
; CHECK: cmp x2, #0
8-
; CHECK: csel [[LO_FOR_HI:x[0-9]+]], xzr, [[LO_FOR_HI_NORMAL]], eq
9-
; CHECK: lsl [[HI_FOR_HI:x[0-9]+]], x1, x2
10-
; CHECK: orr [[HI_NORMAL:x[0-9]+]], [[LO_FOR_HI]], [[HI_FOR_HI]]
11-
; CHECK: lsl [[HI_BIG_SHIFT:x[0-9]+]], x0, x2
12-
; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
13-
; CHECK: cmp [[EXTRA_SHIFT]], #0
14-
; CHECK: csel x1, [[HI_BIG_SHIFT]], [[HI_NORMAL]], ge
15-
; CHECK: csel x0, xzr, [[HI_BIG_SHIFT]], ge
16-
; CHECK: ret
17-
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: neg x8, x2
8+
; CHECK-NEXT: lsr x8, x0, x8
9+
; CHECK-NEXT: cmp x2, #0 // =0
10+
; CHECK-NEXT: csel x8, xzr, x8, eq
11+
; CHECK-NEXT: lsl x9, x1, x2
12+
; CHECK-NEXT: orr x8, x8, x9
13+
; CHECK-NEXT: lsl x9, x0, x2
14+
; CHECK-NEXT: sub x10, x2, #64 // =64
15+
; CHECK-NEXT: cmp x10, #0 // =0
16+
; CHECK-NEXT: csel x1, x9, x8, ge
17+
; CHECK-NEXT: csel x0, xzr, x9, ge
18+
; CHECK-NEXT: ret
1819
%shl = shl i128 %r, %s
1920
ret i128 %shl
2021
}
2122

23+
define i128 @shl_mask(i128 %r, i128 %s) nounwind readnone {
24+
; CHECK-LABEL: shl_mask:
25+
; CHECK: // %bb.0:
26+
; CHECK-NEXT: lsl x8, x1, x2
27+
; CHECK-NEXT: lsr x9, x0, #1
28+
; CHECK-NEXT: and x10, x2, #0x3f
29+
; CHECK-NEXT: eor x10, x10, #0x3f
30+
; CHECK-NEXT: lsr x9, x9, x10
31+
; CHECK-NEXT: orr x1, x8, x9
32+
; CHECK-NEXT: lsl x0, x0, x2
33+
; CHECK-NEXT: ret
34+
%mask = and i128 %s, 63
35+
%shl = shl i128 %r, %mask
36+
ret i128 %shl
37+
}
38+
2239
define i128 @ashr(i128 %r, i128 %s) nounwind readnone {
2340
; CHECK-LABEL: ashr:
24-
; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2
25-
; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]]
26-
; CHECK: cmp x2, #0
27-
; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq
28-
; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2
29-
; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]]
30-
; CHECK: asr [[LO_BIG_SHIFT:x[0-9]+]], x1, x2
31-
; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
32-
; CHECK: cmp [[EXTRA_SHIFT]], #0
33-
; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge
34-
; CHECK: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63
35-
; CHECK: csel x1, [[BIGSHIFT_HI]], [[LO_BIG_SHIFT]], ge
36-
; CHECK: ret
37-
41+
; CHECK: // %bb.0:
42+
; CHECK-NEXT: neg x8, x2
43+
; CHECK-NEXT: lsl x8, x1, x8
44+
; CHECK-NEXT: cmp x2, #0 // =0
45+
; CHECK-NEXT: csel x8, xzr, x8, eq
46+
; CHECK-NEXT: lsr x9, x0, x2
47+
; CHECK-NEXT: orr x8, x9, x8
48+
; CHECK-NEXT: asr x9, x1, x2
49+
; CHECK-NEXT: sub x10, x2, #64 // =64
50+
; CHECK-NEXT: cmp x10, #0 // =0
51+
; CHECK-NEXT: csel x0, x9, x8, ge
52+
; CHECK-NEXT: asr x8, x1, #63
53+
; CHECK-NEXT: csel x1, x8, x9, ge
54+
; CHECK-NEXT: ret
3855
%shr = ashr i128 %r, %s
3956
ret i128 %shr
4057
}
4158

59+
define i128 @ashr_mask(i128 %r, i128 %s) nounwind readnone {
60+
; CHECK-LABEL: ashr_mask:
61+
; CHECK: // %bb.0:
62+
; CHECK-NEXT: lsr x8, x0, x2
63+
; CHECK-NEXT: lsl x9, x1, #1
64+
; CHECK-NEXT: and x10, x2, #0x3f
65+
; CHECK-NEXT: eor x10, x10, #0x3f
66+
; CHECK-NEXT: lsl x9, x9, x10
67+
; CHECK-NEXT: orr x0, x8, x9
68+
; CHECK-NEXT: asr x1, x1, x2
69+
; CHECK-NEXT: ret
70+
%mask = and i128 %s, 63
71+
%shr = ashr i128 %r, %mask
72+
ret i128 %shr
73+
}
74+
4275
define i128 @lshr(i128 %r, i128 %s) nounwind readnone {
4376
; CHECK-LABEL: lshr:
44-
; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2
45-
; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]]
46-
; CHECK: cmp x2, #0
47-
; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq
48-
; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2
49-
; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]]
50-
; CHECK: lsr [[LO_BIG_SHIFT:x[0-9]+]], x1, x2
51-
; CHECK: cmp [[EXTRA_SHIFT]], #0
52-
; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge
53-
; CHECK: csel x1, xzr, [[LO_BIG_SHIFT]], ge
54-
; CHECK: ret
55-
77+
; CHECK: // %bb.0:
78+
; CHECK-NEXT: neg x8, x2
79+
; CHECK-NEXT: lsl x8, x1, x8
80+
; CHECK-NEXT: cmp x2, #0 // =0
81+
; CHECK-NEXT: csel x8, xzr, x8, eq
82+
; CHECK-NEXT: lsr x9, x0, x2
83+
; CHECK-NEXT: orr x8, x9, x8
84+
; CHECK-NEXT: lsr x9, x1, x2
85+
; CHECK-NEXT: sub x10, x2, #64 // =64
86+
; CHECK-NEXT: cmp x10, #0 // =0
87+
; CHECK-NEXT: csel x0, x9, x8, ge
88+
; CHECK-NEXT: csel x1, xzr, x9, ge
89+
; CHECK-NEXT: ret
5690
%shr = lshr i128 %r, %s
5791
ret i128 %shr
5892
}
93+
94+
define i128 @lshr_mask(i128 %r, i128 %s) nounwind readnone {
95+
; CHECK-LABEL: lshr_mask:
96+
; CHECK: // %bb.0:
97+
; CHECK-NEXT: lsr x8, x0, x2
98+
; CHECK-NEXT: lsl x9, x1, #1
99+
; CHECK-NEXT: and x10, x2, #0x3f
100+
; CHECK-NEXT: eor x10, x10, #0x3f
101+
; CHECK-NEXT: lsl x9, x9, x10
102+
; CHECK-NEXT: orr x0, x8, x9
103+
; CHECK-NEXT: lsr x1, x1, x2
104+
; CHECK-NEXT: ret
105+
%mask = and i128 %s, 63
106+
%shr = lshr i128 %r, %mask
107+
ret i128 %shr
108+
}

llvm/test/CodeGen/AArch64/funnel-shift.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,19 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
2929
ret i32 %f
3030
}
3131

32+
define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
33+
; CHECK-LABEL: fshl_i64:
34+
; CHECK: // %bb.0:
35+
; CHECK-NEXT: mvn w9, w2
36+
; CHECK-NEXT: lsr x10, x1, #1
37+
; CHECK-NEXT: lsl x8, x0, x2
38+
; CHECK-NEXT: lsr x9, x10, x9
39+
; CHECK-NEXT: orr x0, x8, x9
40+
; CHECK-NEXT: ret
41+
%f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
42+
ret i64 %f
43+
}
44+
3245
; Verify that weird types are minimally supported.
3346
declare i37 @llvm.fshl.i37(i37, i37, i37)
3447
define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
@@ -153,6 +166,19 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
153166
ret i32 %f
154167
}
155168

169+
define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) {
170+
; CHECK-LABEL: fshr_i64:
171+
; CHECK: // %bb.0:
172+
; CHECK-NEXT: mvn w9, w2
173+
; CHECK-NEXT: lsl x10, x0, #1
174+
; CHECK-NEXT: lsr x8, x1, x2
175+
; CHECK-NEXT: lsl x9, x10, x9
176+
; CHECK-NEXT: orr x0, x9, x8
177+
; CHECK-NEXT: ret
178+
%f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
179+
ret i64 %f
180+
}
181+
156182
; Verify that weird types are minimally supported.
157183
declare i37 @llvm.fshr.i37(i37, i37, i37)
158184
define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {

0 commit comments

Comments
 (0)