Skip to content

Commit 2e897a9

Browse files
committed
[X86][BtVer2] Use ReadAfterLd entries for the register operands of CMPXCHG.
This is a follow-up of r369365. llvm-svn: 369412
1 parent 2e68e4d commit 2e897a9

File tree

2 files changed

+298
-3
lines changed

2 files changed

+298
-3
lines changed

llvm/lib/Target/X86/X86ScheduleBtVer2.td

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -365,9 +365,18 @@ def JWriteCMPXCHGVariant : SchedWriteVariant<[
365365
SchedVar<MCSchedPredicate<IsRegRegCompareAndSwap_8>, [JWriteCMPXCHG8rr]>,
366366
SchedVar<NoSchedPred, [WriteCMPXCHG]>
367367
]>;
368-
def : InstRW<[JWriteCMPXCHGVariant], (instrs CMPXCHG8rr, LCMPXCHG8, CMPXCHG8rm,
369-
CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm,
370-
LCMPXCHG16, LCMPXCHG32, LCMPXCHG64,
368+
369+
// The first five reads are contributed by the memory load operand.
370+
// We ignore those reads and set a read-advance for the other input operands
371+
// including the implicit read of RAX.
372+
def : InstRW<[JWriteCMPXCHGVariant,
373+
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
374+
ReadAfterLd, ReadAfterLd], (instrs LCMPXCHG8, LCMPXCHG16,
375+
LCMPXCHG32, LCMPXCHG64,
376+
CMPXCHG8rm, CMPXCHG16rm,
377+
CMPXCHG32rm, CMPXCHG64rm )>;
378+
379+
def : InstRW<[JWriteCMPXCHGVariant], (instrs CMPXCHG8rr, CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr,
371380
CMPXCHG8B, CMPXCHG16B,
372381
LCMPXCHG8B, LCMPXCHG16B)>;
373382

Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2+
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline < %s | FileCheck %s
3+
4+
# LLVM-MCA-BEGIN
5+
imul %rax, %rax
6+
cmpxchgq %rcx, (%rdx)
7+
# LLVM-MCA-END
8+
9+
# LLVM-MCA-BEGIN
10+
imul %rcx, %rcx
11+
cmpxchgq %rcx, (%rdx)
12+
# LLVM-MCA-END
13+
14+
# LLVM-MCA-BEGIN
15+
imul %rax, %rax
16+
lock cmpxchgq %rcx, (%rdx)
17+
# LLVM-MCA-END
18+
19+
# LLVM-MCA-BEGIN
20+
imul %rcx, %rcx
21+
lock cmpxchgq %rcx, (%rdx)
22+
# LLVM-MCA-END
23+
24+
# CHECK: [0] Code Region
25+
26+
# CHECK: Iterations: 1
27+
# CHECK-NEXT: Instructions: 2
28+
# CHECK-NEXT: Total Cycles: 17
29+
# CHECK-NEXT: Total uOps: 8
30+
31+
# CHECK: Dispatch Width: 2
32+
# CHECK-NEXT: uOps Per Cycle: 0.47
33+
# CHECK-NEXT: IPC: 0.12
34+
# CHECK-NEXT: Block RThroughput: 4.0
35+
36+
# CHECK: Instruction Info:
37+
# CHECK-NEXT: [1]: #uOps
38+
# CHECK-NEXT: [2]: Latency
39+
# CHECK-NEXT: [3]: RThroughput
40+
# CHECK-NEXT: [4]: MayLoad
41+
# CHECK-NEXT: [5]: MayStore
42+
# CHECK-NEXT: [6]: HasSideEffects (U)
43+
44+
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
45+
# CHECK-NEXT: 2 6 4.00 imulq %rax, %rax
46+
# CHECK-NEXT: 6 11 1.50 * * cmpxchgq %rcx, (%rdx)
47+
48+
# CHECK: Resources:
49+
# CHECK-NEXT: [0] - JALU0
50+
# CHECK-NEXT: [1] - JALU1
51+
# CHECK-NEXT: [2] - JDiv
52+
# CHECK-NEXT: [3] - JFPA
53+
# CHECK-NEXT: [4] - JFPM
54+
# CHECK-NEXT: [5] - JFPU0
55+
# CHECK-NEXT: [6] - JFPU1
56+
# CHECK-NEXT: [7] - JLAGU
57+
# CHECK-NEXT: [8] - JMul
58+
# CHECK-NEXT: [9] - JSAGU
59+
# CHECK-NEXT: [10] - JSTC
60+
# CHECK-NEXT: [11] - JVALU0
61+
# CHECK-NEXT: [12] - JVALU1
62+
# CHECK-NEXT: [13] - JVIMUL
63+
64+
# CHECK: Resource pressure per iteration:
65+
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
66+
# CHECK-NEXT: 3.00 1.00 - - - - - 1.00 4.00 1.00 - - - -
67+
68+
# CHECK: Resource pressure by instruction:
69+
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
70+
# CHECK-NEXT: - 1.00 - - - - - - 4.00 - - - - - imulq %rax, %rax
71+
# CHECK-NEXT: 3.00 - - - - - - 1.00 - 1.00 - - - - cmpxchgq %rcx, (%rdx)
72+
73+
# CHECK: Timeline view:
74+
# CHECK-NEXT: 0123456
75+
# CHECK-NEXT: Index 0123456789
76+
77+
# CHECK: [0,0] DeeeeeeER . .. imulq %rax, %rax
78+
# CHECK-NEXT: [0,1] .D==eeeeeeeeeeeER cmpxchgq %rcx, (%rdx)
79+
80+
# CHECK: Average Wait times (based on the timeline view):
81+
# CHECK-NEXT: [0]: Executions
82+
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
83+
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
84+
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
85+
86+
# CHECK: [0] [1] [2] [3]
87+
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rax
88+
# CHECK-NEXT: 1. 1 3.0 0.0 0.0 cmpxchgq %rcx, (%rdx)
89+
90+
# CHECK: [1] Code Region
91+
92+
# CHECK: Iterations: 1
93+
# CHECK-NEXT: Instructions: 2
94+
# CHECK-NEXT: Total Cycles: 17
95+
# CHECK-NEXT: Total uOps: 8
96+
97+
# CHECK: Dispatch Width: 2
98+
# CHECK-NEXT: uOps Per Cycle: 0.47
99+
# CHECK-NEXT: IPC: 0.12
100+
# CHECK-NEXT: Block RThroughput: 4.0
101+
102+
# CHECK: Instruction Info:
103+
# CHECK-NEXT: [1]: #uOps
104+
# CHECK-NEXT: [2]: Latency
105+
# CHECK-NEXT: [3]: RThroughput
106+
# CHECK-NEXT: [4]: MayLoad
107+
# CHECK-NEXT: [5]: MayStore
108+
# CHECK-NEXT: [6]: HasSideEffects (U)
109+
110+
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
111+
# CHECK-NEXT: 2 6 4.00 imulq %rcx, %rcx
112+
# CHECK-NEXT: 6 11 1.50 * * cmpxchgq %rcx, (%rdx)
113+
114+
# CHECK: Resources:
115+
# CHECK-NEXT: [0] - JALU0
116+
# CHECK-NEXT: [1] - JALU1
117+
# CHECK-NEXT: [2] - JDiv
118+
# CHECK-NEXT: [3] - JFPA
119+
# CHECK-NEXT: [4] - JFPM
120+
# CHECK-NEXT: [5] - JFPU0
121+
# CHECK-NEXT: [6] - JFPU1
122+
# CHECK-NEXT: [7] - JLAGU
123+
# CHECK-NEXT: [8] - JMul
124+
# CHECK-NEXT: [9] - JSAGU
125+
# CHECK-NEXT: [10] - JSTC
126+
# CHECK-NEXT: [11] - JVALU0
127+
# CHECK-NEXT: [12] - JVALU1
128+
# CHECK-NEXT: [13] - JVIMUL
129+
130+
# CHECK: Resource pressure per iteration:
131+
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
132+
# CHECK-NEXT: 3.00 1.00 - - - - - 1.00 4.00 1.00 - - - -
133+
134+
# CHECK: Resource pressure by instruction:
135+
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
136+
# CHECK-NEXT: - 1.00 - - - - - - 4.00 - - - - - imulq %rcx, %rcx
137+
# CHECK-NEXT: 3.00 - - - - - - 1.00 - 1.00 - - - - cmpxchgq %rcx, (%rdx)
138+
139+
# CHECK: Timeline view:
140+
# CHECK-NEXT: 0123456
141+
# CHECK-NEXT: Index 0123456789
142+
143+
# CHECK: [0,0] DeeeeeeER . .. imulq %rcx, %rcx
144+
# CHECK-NEXT: [0,1] .D==eeeeeeeeeeeER cmpxchgq %rcx, (%rdx)
145+
146+
# CHECK: Average Wait times (based on the timeline view):
147+
# CHECK-NEXT: [0]: Executions
148+
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
149+
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
150+
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
151+
152+
# CHECK: [0] [1] [2] [3]
153+
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rcx, %rcx
154+
# CHECK-NEXT: 1. 1 3.0 0.0 0.0 cmpxchgq %rcx, (%rdx)
155+
156+
# CHECK: [2] Code Region
157+
158+
# CHECK: Iterations: 1
159+
# CHECK-NEXT: Instructions: 2
160+
# CHECK-NEXT: Total Cycles: 23
161+
# CHECK-NEXT: Total uOps: 8
162+
163+
# CHECK: Dispatch Width: 2
164+
# CHECK-NEXT: uOps Per Cycle: 0.35
165+
# CHECK-NEXT: IPC: 0.09
166+
# CHECK-NEXT: Block RThroughput: 17.0
167+
168+
# CHECK: Instruction Info:
169+
# CHECK-NEXT: [1]: #uOps
170+
# CHECK-NEXT: [2]: Latency
171+
# CHECK-NEXT: [3]: RThroughput
172+
# CHECK-NEXT: [4]: MayLoad
173+
# CHECK-NEXT: [5]: MayStore
174+
# CHECK-NEXT: [6]: HasSideEffects (U)
175+
176+
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
177+
# CHECK-NEXT: 2 6 4.00 imulq %rax, %rax
178+
# CHECK-NEXT: 6 17 17.00 * * lock cmpxchgq %rcx, (%rdx)
179+
180+
# CHECK: Resources:
181+
# CHECK-NEXT: [0] - JALU0
182+
# CHECK-NEXT: [1] - JALU1
183+
# CHECK-NEXT: [2] - JDiv
184+
# CHECK-NEXT: [3] - JFPA
185+
# CHECK-NEXT: [4] - JFPM
186+
# CHECK-NEXT: [5] - JFPU0
187+
# CHECK-NEXT: [6] - JFPU1
188+
# CHECK-NEXT: [7] - JLAGU
189+
# CHECK-NEXT: [8] - JMul
190+
# CHECK-NEXT: [9] - JSAGU
191+
# CHECK-NEXT: [10] - JSTC
192+
# CHECK-NEXT: [11] - JVALU0
193+
# CHECK-NEXT: [12] - JVALU1
194+
# CHECK-NEXT: [13] - JVIMUL
195+
196+
# CHECK: Resource pressure per iteration:
197+
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
198+
# CHECK-NEXT: 3.00 1.00 - - - - - 17.00 4.00 17.00 - - - -
199+
200+
# CHECK: Resource pressure by instruction:
201+
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
202+
# CHECK-NEXT: - 1.00 - - - - - - 4.00 - - - - - imulq %rax, %rax
203+
# CHECK-NEXT: 3.00 - - - - - - 17.00 - 17.00 - - - - lock cmpxchgq %rcx, (%rdx)
204+
205+
# CHECK: Timeline view:
206+
# CHECK-NEXT: 0123456789
207+
# CHECK-NEXT: Index 0123456789 012
208+
209+
# CHECK: [0,0] DeeeeeeER . . . . imulq %rax, %rax
210+
# CHECK-NEXT: [0,1] .D==eeeeeeeeeeeeeeeeeER lock cmpxchgq %rcx, (%rdx)
211+
212+
# CHECK: Average Wait times (based on the timeline view):
213+
# CHECK-NEXT: [0]: Executions
214+
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
215+
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
216+
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
217+
218+
# CHECK: [0] [1] [2] [3]
219+
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rax
220+
# CHECK-NEXT: 1. 1 3.0 0.0 0.0 lock cmpxchgq %rcx, (%rdx)
221+
222+
# CHECK: [3] Code Region
223+
224+
# CHECK: Iterations: 1
225+
# CHECK-NEXT: Instructions: 2
226+
# CHECK-NEXT: Total Cycles: 23
227+
# CHECK-NEXT: Total uOps: 8
228+
229+
# CHECK: Dispatch Width: 2
230+
# CHECK-NEXT: uOps Per Cycle: 0.35
231+
# CHECK-NEXT: IPC: 0.09
232+
# CHECK-NEXT: Block RThroughput: 17.0
233+
234+
# CHECK: Instruction Info:
235+
# CHECK-NEXT: [1]: #uOps
236+
# CHECK-NEXT: [2]: Latency
237+
# CHECK-NEXT: [3]: RThroughput
238+
# CHECK-NEXT: [4]: MayLoad
239+
# CHECK-NEXT: [5]: MayStore
240+
# CHECK-NEXT: [6]: HasSideEffects (U)
241+
242+
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
243+
# CHECK-NEXT: 2 6 4.00 imulq %rcx, %rcx
244+
# CHECK-NEXT: 6 17 17.00 * * lock cmpxchgq %rcx, (%rdx)
245+
246+
# CHECK: Resources:
247+
# CHECK-NEXT: [0] - JALU0
248+
# CHECK-NEXT: [1] - JALU1
249+
# CHECK-NEXT: [2] - JDiv
250+
# CHECK-NEXT: [3] - JFPA
251+
# CHECK-NEXT: [4] - JFPM
252+
# CHECK-NEXT: [5] - JFPU0
253+
# CHECK-NEXT: [6] - JFPU1
254+
# CHECK-NEXT: [7] - JLAGU
255+
# CHECK-NEXT: [8] - JMul
256+
# CHECK-NEXT: [9] - JSAGU
257+
# CHECK-NEXT: [10] - JSTC
258+
# CHECK-NEXT: [11] - JVALU0
259+
# CHECK-NEXT: [12] - JVALU1
260+
# CHECK-NEXT: [13] - JVIMUL
261+
262+
# CHECK: Resource pressure per iteration:
263+
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
264+
# CHECK-NEXT: 3.00 1.00 - - - - - 17.00 4.00 17.00 - - - -
265+
266+
# CHECK: Resource pressure by instruction:
267+
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
268+
# CHECK-NEXT: - 1.00 - - - - - - 4.00 - - - - - imulq %rcx, %rcx
269+
# CHECK-NEXT: 3.00 - - - - - - 17.00 - 17.00 - - - - lock cmpxchgq %rcx, (%rdx)
270+
271+
# CHECK: Timeline view:
272+
# CHECK-NEXT: 0123456789
273+
# CHECK-NEXT: Index 0123456789 012
274+
275+
# CHECK: [0,0] DeeeeeeER . . . . imulq %rcx, %rcx
276+
# CHECK-NEXT: [0,1] .D==eeeeeeeeeeeeeeeeeER lock cmpxchgq %rcx, (%rdx)
277+
278+
# CHECK: Average Wait times (based on the timeline view):
279+
# CHECK-NEXT: [0]: Executions
280+
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
281+
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
282+
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
283+
284+
# CHECK: [0] [1] [2] [3]
285+
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rcx, %rcx
286+
# CHECK-NEXT: 1. 1 3.0 0.0 0.0 lock cmpxchgq %rcx, (%rdx)

0 commit comments

Comments
 (0)