Skip to content

Commit 2bfdff8

Browse files
[LLVM][AArch64] Correctly lower funnel shifts by constants.
Prevent LowerFunnelShift from creating an invalid ISD::FSHR when lowering "ISD::FSHL X, Y, 0". Such inputs are rare because it's a NOP that DAGCombiner will optimise away. However, we should not rely on this and so this PR mirrors the same optimisation. Ensure LowerFunnelShift normalises constant shift amounts because isel rules expect them to be in the range [0, src bit length). NOTE: To simiplify testing, this PR also adds a command line option to disable the DAG combiner (-combiner-disabled).
1 parent 8dd0065 commit 2bfdff8

File tree

2 files changed

+131
-6
lines changed

2 files changed

+131
-6
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7259,21 +7259,30 @@ static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
72597259
// FSHL is converted to FSHR before deciding what to do with it
72607260
static SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) {
72617261
SDValue Shifts = Op.getOperand(2);
7262-
// Check if the shift amount is a constant
7262+
// Check if the shift amount is a constant and normalise to [0, SrcBitLen)
72637263
// If opcode is FSHL, convert it to FSHR
72647264
if (auto *ShiftNo = dyn_cast<ConstantSDNode>(Shifts)) {
72657265
SDLoc DL(Op);
72667266
MVT VT = Op.getSimpleValueType();
7267+
unsigned int NewShiftNo = ShiftNo->getZExtValue() % VT.getFixedSizeInBits();
72677268

72687269
if (Op.getOpcode() == ISD::FSHL) {
7269-
unsigned int NewShiftNo =
7270-
VT.getFixedSizeInBits() - ShiftNo->getZExtValue();
7270+
if (NewShiftNo == 0)
7271+
return Op.getOperand(0);
7272+
7273+
NewShiftNo = VT.getFixedSizeInBits() - NewShiftNo;
7274+
} else if (Op.getOpcode() == ISD::FSHR) {
7275+
if (NewShiftNo == 0)
7276+
return Op.getOperand(1);
7277+
7278+
if (ShiftNo->getZExtValue() == NewShiftNo)
7279+
return Op;
7280+
}
7281+
7282+
if (ShiftNo->getZExtValue() != NewShiftNo)
72717283
return DAG.getNode(
72727284
ISD::FSHR, DL, VT, Op.getOperand(0), Op.getOperand(1),
72737285
DAG.getConstant(NewShiftNo, DL, Shifts.getValueType()));
7274-
} else if (Op.getOpcode() == ISD::FSHR) {
7275-
return Op;
7276-
}
72777286
}
72787287

72797288
return SDValue();
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc %s -o - | FileCheck %s
3+
; RUN: llc -combiner-disabled %s -o - | FileCheck %s
4+
5+
target triple = "aarch64-unknown-linux-gnu"
6+
7+
; Verify lowering code in isolation to ensure we can lower shifts that would
8+
; normally be optimised away.
9+
10+
define i32 @fshl_i32_by_zero(i32 %unused, i32 %a, i32 %b) {
11+
; CHECK-LABEL: fshl_i32_by_zero:
12+
; CHECK: // %bb.0:
13+
; CHECK-NEXT: mov w0, w1
14+
; CHECK-NEXT: ret
15+
%r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 0)
16+
ret i32 %r
17+
}
18+
19+
define i32 @fshl_i32_by_srclen(i32 %unused, i32 %a, i32 %b) {
20+
; CHECK-LABEL: fshl_i32_by_srclen:
21+
; CHECK: // %bb.0:
22+
; CHECK-NEXT: mov w0, w1
23+
; CHECK-NEXT: ret
24+
%r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 32)
25+
ret i32 %r
26+
}
27+
28+
define i32 @fshl_i32_by_srclen_plus1(i32 %unused, i32 %a, i32 %b) {
29+
; CHECK-LABEL: fshl_i32_by_srclen_plus1:
30+
; CHECK: // %bb.0:
31+
; CHECK-NEXT: extr w0, w1, w2, #31
32+
; CHECK-NEXT: ret
33+
%r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 33)
34+
ret i32 %r
35+
}
36+
37+
define i64 @fshl_i64_by_zero(i64 %unused, i64 %a, i64 %b) {
38+
; CHECK-LABEL: fshl_i64_by_zero:
39+
; CHECK: // %bb.0:
40+
; CHECK-NEXT: mov x0, x1
41+
; CHECK-NEXT: ret
42+
%r = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 0)
43+
ret i64 %r
44+
}
45+
46+
define i64 @fshl_i64_by_srclen(i64 %unused, i64 %a, i64 %b) {
47+
; CHECK-LABEL: fshl_i64_by_srclen:
48+
; CHECK: // %bb.0:
49+
; CHECK-NEXT: mov x0, x1
50+
; CHECK-NEXT: ret
51+
%r = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 64)
52+
ret i64 %r
53+
}
54+
55+
define i64 @fshl_i64_by_srclen_plus1(i64 %unused, i64 %a, i64 %b) {
56+
; CHECK-LABEL: fshl_i64_by_srclen_plus1:
57+
; CHECK: // %bb.0:
58+
; CHECK-NEXT: extr x0, x1, x2, #63
59+
; CHECK-NEXT: ret
60+
%r = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 65)
61+
ret i64 %r
62+
}
63+
64+
define i32 @fshr_i32_by_zero(i32 %unused, i32 %a, i32 %b) {
65+
; CHECK-LABEL: fshr_i32_by_zero:
66+
; CHECK: // %bb.0:
67+
; CHECK-NEXT: mov w0, w2
68+
; CHECK-NEXT: ret
69+
%r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 0)
70+
ret i32 %r
71+
}
72+
73+
define i32 @fshr_i32_by_srclen(i32 %unused, i32 %a, i32 %b) {
74+
; CHECK-LABEL: fshr_i32_by_srclen:
75+
; CHECK: // %bb.0:
76+
; CHECK-NEXT: mov w0, w2
77+
; CHECK-NEXT: ret
78+
%r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 32)
79+
ret i32 %r
80+
}
81+
82+
define i32 @fshr_i32_by_srclen_plus1(i32 %unused, i32 %a, i32 %b) {
83+
; CHECK-LABEL: fshr_i32_by_srclen_plus1:
84+
; CHECK: // %bb.0:
85+
; CHECK-NEXT: extr w0, w1, w2, #1
86+
; CHECK-NEXT: ret
87+
%r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 33)
88+
ret i32 %r
89+
}
90+
91+
define i64 @fshr_i64_by_zero(i64 %unused, i64 %a, i64 %b) {
92+
; CHECK-LABEL: fshr_i64_by_zero:
93+
; CHECK: // %bb.0:
94+
; CHECK-NEXT: mov x0, x2
95+
; CHECK-NEXT: ret
96+
%r = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 0)
97+
ret i64 %r
98+
}
99+
100+
define i64 @fshr_i64_by_srclen(i64 %unused, i64 %a, i64 %b) {
101+
; CHECK-LABEL: fshr_i64_by_srclen:
102+
; CHECK: // %bb.0:
103+
; CHECK-NEXT: mov x0, x2
104+
; CHECK-NEXT: ret
105+
%r = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 64)
106+
ret i64 %r
107+
}
108+
109+
define i64 @fshr_i64_by_srclen_plus1(i64 %unused, i64 %a, i64 %b) {
110+
; CHECK-LABEL: fshr_i64_by_srclen_plus1:
111+
; CHECK: // %bb.0:
112+
; CHECK-NEXT: extr x0, x1, x2, #1
113+
; CHECK-NEXT: ret
114+
%r = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 65)
115+
ret i64 %r
116+
}

0 commit comments

Comments
 (0)