Skip to content

Commit 5dfaf84

Browse files
[LLVM][AArch64] Correctly lower funnel shifts by constants. (#140058)
Prevent LowerFunnelShift from creating an invalid ISD::FSHR when lowering "ISD::FSHL X, Y, 0". Such inputs are rare because it's a NOP that DAGCombiner will optimise away. However, we should not rely on this and so this PR mirrors the same optimisation. Ensure LowerFunnelShift normalises constant shift amounts because isel rules expect them to be in the range [0, src bit length). NOTE: To simiplify testing, this PR also adds a command line option to disable the DAG combiner (-combiner-disabled).
1 parent 4060d38 commit 5dfaf84

File tree

3 files changed

+159
-6
lines changed

3 files changed

+159
-6
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,10 @@ static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
149149
cl::desc("DAG combiner enable load/<replace bytes>/store with "
150150
"a narrower store"));
151151

152+
static cl::opt<bool> DisableCombines("combiner-disabled", cl::Hidden,
153+
cl::init(false),
154+
cl::desc("Disable the DAG combiner"));
155+
152156
namespace {
153157

154158
class DAGCombiner {
@@ -248,7 +252,8 @@ namespace {
248252
STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
249253
BatchAA(BatchAA) {
250254
ForCodeSize = DAG.shouldOptForSize();
251-
DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
255+
DisableGenericCombines =
256+
DisableCombines || (STI && STI->disableGenericCombines(OptLevel));
252257

253258
MaximumLegalStoreInBits = 0;
254259
// We use the minimum store size here, since that's all we can guarantee

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7262,20 +7262,34 @@ static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
72627262
// FSHL is converted to FSHR before deciding what to do with it
72637263
static SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) {
72647264
SDValue Shifts = Op.getOperand(2);
7265-
// Check if the shift amount is a constant
7265+
// Check if the shift amount is a constant and normalise to [0, SrcBitLen)
72667266
// If opcode is FSHL, convert it to FSHR
72677267
if (auto *ShiftNo = dyn_cast<ConstantSDNode>(Shifts)) {
72687268
SDLoc DL(Op);
72697269
MVT VT = Op.getSimpleValueType();
7270+
unsigned int NewShiftNo = ShiftNo->getZExtValue() % VT.getFixedSizeInBits();
72707271

72717272
if (Op.getOpcode() == ISD::FSHL) {
7272-
unsigned int NewShiftNo =
7273-
VT.getFixedSizeInBits() - ShiftNo->getZExtValue();
7273+
if (NewShiftNo == 0)
7274+
return Op.getOperand(0);
7275+
7276+
NewShiftNo = VT.getFixedSizeInBits() - NewShiftNo;
7277+
return DAG.getNode(
7278+
ISD::FSHR, DL, VT, Op.getOperand(0), Op.getOperand(1),
7279+
DAG.getConstant(NewShiftNo, DL, Shifts.getValueType()));
7280+
}
7281+
7282+
if (Op.getOpcode() == ISD::FSHR) {
7283+
if (NewShiftNo == 0)
7284+
return Op.getOperand(1);
7285+
7286+
if (ShiftNo->getZExtValue() == NewShiftNo)
7287+
return Op;
7288+
7289+
// Rewrite using the normalised shift amount.
72747290
return DAG.getNode(
72757291
ISD::FSHR, DL, VT, Op.getOperand(0), Op.getOperand(1),
72767292
DAG.getConstant(NewShiftNo, DL, Shifts.getValueType()));
7277-
} else if (Op.getOpcode() == ISD::FSHR) {
7278-
return Op;
72797293
}
72807294
}
72817295

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc %s -o - | FileCheck %s
3+
; RUN: llc -combiner-disabled %s -o - | FileCheck %s
4+
5+
target triple = "aarch64-unknown-linux-gnu"
6+
7+
; Verify lowering code in isolation to ensure we can lower shifts that would
8+
; normally be optimised away.
9+
10+
define i32 @fshl_i32_by_zero(i32 %unused, i32 %a, i32 %b) {
11+
; CHECK-LABEL: fshl_i32_by_zero:
12+
; CHECK: // %bb.0:
13+
; CHECK-NEXT: mov w0, w1
14+
; CHECK-NEXT: ret
15+
%r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 0)
16+
ret i32 %r
17+
}
18+
19+
define i32 @fshl_i32_by_half_srclen(i32 %unused, i32 %a, i32 %b) {
20+
; CHECK-LABEL: fshl_i32_by_half_srclen:
21+
; CHECK: // %bb.0:
22+
; CHECK-NEXT: extr w0, w1, w2, #16
23+
; CHECK-NEXT: ret
24+
%r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 16)
25+
ret i32 %r
26+
}
27+
28+
define i32 @fshl_i32_by_srclen(i32 %unused, i32 %a, i32 %b) {
29+
; CHECK-LABEL: fshl_i32_by_srclen:
30+
; CHECK: // %bb.0:
31+
; CHECK-NEXT: mov w0, w1
32+
; CHECK-NEXT: ret
33+
%r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 32)
34+
ret i32 %r
35+
}
36+
37+
define i32 @fshl_i32_by_srclen_plus1(i32 %unused, i32 %a, i32 %b) {
38+
; CHECK-LABEL: fshl_i32_by_srclen_plus1:
39+
; CHECK: // %bb.0:
40+
; CHECK-NEXT: extr w0, w1, w2, #31
41+
; CHECK-NEXT: ret
42+
%r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 33)
43+
ret i32 %r
44+
}
45+
46+
define i64 @fshl_i64_by_zero(i64 %unused, i64 %a, i64 %b) {
47+
; CHECK-LABEL: fshl_i64_by_zero:
48+
; CHECK: // %bb.0:
49+
; CHECK-NEXT: mov x0, x1
50+
; CHECK-NEXT: ret
51+
%r = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 0)
52+
ret i64 %r
53+
}
54+
55+
define i64 @fshl_i64_by_srclen(i64 %unused, i64 %a, i64 %b) {
56+
; CHECK-LABEL: fshl_i64_by_srclen:
57+
; CHECK: // %bb.0:
58+
; CHECK-NEXT: mov x0, x1
59+
; CHECK-NEXT: ret
60+
%r = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 64)
61+
ret i64 %r
62+
}
63+
64+
define i64 @fshl_i64_by_srclen_plus1(i64 %unused, i64 %a, i64 %b) {
65+
; CHECK-LABEL: fshl_i64_by_srclen_plus1:
66+
; CHECK: // %bb.0:
67+
; CHECK-NEXT: extr x0, x1, x2, #63
68+
; CHECK-NEXT: ret
69+
%r = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 65)
70+
ret i64 %r
71+
}
72+
73+
define i32 @fshr_i32_by_zero(i32 %unused, i32 %a, i32 %b) {
74+
; CHECK-LABEL: fshr_i32_by_zero:
75+
; CHECK: // %bb.0:
76+
; CHECK-NEXT: mov w0, w2
77+
; CHECK-NEXT: ret
78+
%r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 0)
79+
ret i32 %r
80+
}
81+
82+
define i32 @fshr_i32_by_srclen(i32 %unused, i32 %a, i32 %b) {
83+
; CHECK-LABEL: fshr_i32_by_srclen:
84+
; CHECK: // %bb.0:
85+
; CHECK-NEXT: mov w0, w2
86+
; CHECK-NEXT: ret
87+
%r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 32)
88+
ret i32 %r
89+
}
90+
91+
define i32 @fshr_i32_by_half_srclen(i32 %unused, i32 %a, i32 %b) {
92+
; CHECK-LABEL: fshr_i32_by_half_srclen:
93+
; CHECK: // %bb.0:
94+
; CHECK-NEXT: extr w0, w1, w2, #16
95+
; CHECK-NEXT: ret
96+
%r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 16)
97+
ret i32 %r
98+
}
99+
100+
define i32 @fshr_i32_by_srclen_plus1(i32 %unused, i32 %a, i32 %b) {
101+
; CHECK-LABEL: fshr_i32_by_srclen_plus1:
102+
; CHECK: // %bb.0:
103+
; CHECK-NEXT: extr w0, w1, w2, #1
104+
; CHECK-NEXT: ret
105+
%r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 33)
106+
ret i32 %r
107+
}
108+
109+
define i64 @fshr_i64_by_zero(i64 %unused, i64 %a, i64 %b) {
110+
; CHECK-LABEL: fshr_i64_by_zero:
111+
; CHECK: // %bb.0:
112+
; CHECK-NEXT: mov x0, x2
113+
; CHECK-NEXT: ret
114+
%r = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 0)
115+
ret i64 %r
116+
}
117+
118+
define i64 @fshr_i64_by_srclen(i64 %unused, i64 %a, i64 %b) {
119+
; CHECK-LABEL: fshr_i64_by_srclen:
120+
; CHECK: // %bb.0:
121+
; CHECK-NEXT: mov x0, x2
122+
; CHECK-NEXT: ret
123+
%r = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 64)
124+
ret i64 %r
125+
}
126+
127+
define i64 @fshr_i64_by_srclen_plus1(i64 %unused, i64 %a, i64 %b) {
128+
; CHECK-LABEL: fshr_i64_by_srclen_plus1:
129+
; CHECK: // %bb.0:
130+
; CHECK-NEXT: extr x0, x1, x2, #1
131+
; CHECK-NEXT: ret
132+
%r = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 65)
133+
ret i64 %r
134+
}

0 commit comments

Comments
 (0)