Skip to content

Commit 7b2e16f

Browse files
authored
[AArch64] Fix scheduling model issue #96394 (#97047)
The NeoverseZeroMove predicate assumes that the first operand is always an immediate, which isn't always true. For example, it could be a stack offset, etc. This patch fixes that by checking if the operand is an immediate first.
1 parent 8eee6d3 commit 7b2e16f

File tree

2 files changed

+55
-2
lines changed

2 files changed

+55
-2
lines changed

llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,9 @@ def NeoverseZeroMove : MCSchedPredicate<
6060
// MOV Wd, #0
6161
// MOV Xd, #0
6262
CheckAll<[CheckOpcode<[MOVZWi, MOVZXi]>,
63-
CheckAll<[CheckImmOperand<1, 0>,
64-
CheckImmOperand<2, 0>]>]>,
63+
CheckIsImmOperand<1>,
64+
CheckImmOperand<1, 0>,
65+
CheckImmOperand<2, 0>]>,
6566
// MOV Wd, WZR
6667
// MOV Xd, XZR
6768
// MOV Wd, Wn
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# RUN: llc -run-pass=machine-scheduler -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 %s -o /dev/null 2>&1
2+
# Just ensure this doesn't crash. Ensures in the neoverse-v2
3+
# scheduling model we don't attempt to treat the first input
4+
# operand of MOVZXi as an immediate operand.
5+
6+
--- |
7+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
8+
9+
declare void @foo2(<2 x float>) #0
10+
11+
define void @foo1() #0 {
12+
call void @foo2(<2 x float> <float 2.500000e-01, float 7.500000e-01>)
13+
ret void
14+
}
15+
16+
attributes #0 = { "target-cpu"="neoverse-v2" }
17+
18+
...
19+
---
20+
name: foo1
21+
alignment: 16
22+
tracksRegLiveness: true
23+
registers:
24+
- { id: 0, class: gpr64 }
25+
- { id: 1, class: gpr64 }
26+
- { id: 2, class: gpr64common }
27+
- { id: 3, class: gpr64common }
28+
- { id: 4, class: fpr64 }
29+
frameInfo:
30+
maxAlignment: 1
31+
adjustsStack: true
32+
hasCalls: true
33+
maxCallFrameSize: 0
34+
constants:
35+
- id: 0
36+
value: '<2 x i32> <i32 1048576000, i32 1061158912>'
37+
alignment: 8
38+
machineFunctionInfo: {}
39+
body: |
40+
bb.0 (%ir-block.0):
41+
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
42+
%2:gpr64common = MOVZXi target-flags(aarch64-g0, aarch64-nc) %const.0, 0
43+
%2:gpr64common = MOVKXi %2, target-flags(aarch64-g1, aarch64-nc) %const.0, 16
44+
%2:gpr64common = MOVKXi %2, target-flags(aarch64-g2, aarch64-nc) %const.0, 32
45+
%2:gpr64common = MOVKXi %2, target-flags(aarch64-g3) %const.0, 48
46+
%4:fpr64 = LDRDui %2, 0 :: (load (s64) from constant-pool)
47+
$d0 = COPY %4
48+
BL @foo2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $d0, implicit-def $sp
49+
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
50+
RET_ReallyLR
51+
52+
...

0 commit comments

Comments
 (0)