Skip to content

Commit 18fd09a

Browse files
[X86SchedSandyBridge] update cost of COPY to 1 cycle from 0
To match the cost of other scheduling models. This is expected to schedule mov instructions around INLINEASM less frequently for the default machineschedule (pre-RA scheduling). Suggested by Craig Topper. Link: #41914 Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D122350
1 parent e1554ac commit 18fd09a

15 files changed

+388
-390
lines changed

llvm/lib/Target/X86/X86SchedSandyBridge.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
111111
def : WriteRes<WriteStoreNT, [SBPort23, SBPort4]>;
112112
def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
113113
def : WriteRes<WriteMove, [SBPort015]>;
114+
def : InstRW<[WriteMove], (instrs COPY)>;
114115
def : WriteRes<WriteZero, []>;
115116
def : WriteRes<WriteVecMaskedGatherWriteback, []> { let Latency = 5; let NumMicroOps = 0; }
116117

llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ define fastcc i64 @foo() nounwind {
3535
; CHECK-NEXT: movq X(%rip), %rdi
3636
; CHECK-NEXT: addq %rbx, %r12
3737
; CHECK-NEXT: addq %r8, %rdx
38-
; CHECK-NEXT: bswapq %rdi
3938
; CHECK-NEXT: addq %rbx, %rdx
39+
; CHECK-NEXT: bswapq %rdi
4040
; CHECK-NEXT: leaq (%r15,%r14), %rsi
4141
; CHECK-NEXT: addq %r12, %rsi
4242
; CHECK-NEXT: addq %r11, %rdi
@@ -61,8 +61,8 @@ define fastcc i64 @foo() nounwind {
6161
; CHECK-NEXT: addq %r9, %rbx
6262
; CHECK-NEXT: addq %rbx, %r10
6363
; CHECK-NEXT: addq %rsi, %rdi
64-
; CHECK-NEXT: bswapq %rcx
6564
; CHECK-NEXT: addq %rbx, %rdi
65+
; CHECK-NEXT: bswapq %rcx
6666
; CHECK-NEXT: leaq (%r9,%r12), %rax
6767
; CHECK-NEXT: addq %r10, %rax
6868
; CHECK-NEXT: addq %r15, %rcx
@@ -87,8 +87,8 @@ define fastcc i64 @foo() nounwind {
8787
; CHECK-NEXT: addq %r8, %rbx
8888
; CHECK-NEXT: addq %rbx, %rdx
8989
; CHECK-NEXT: addq %r11, %r14
90-
; CHECK-NEXT: bswapq %rax
9190
; CHECK-NEXT: addq %rbx, %r14
91+
; CHECK-NEXT: bswapq %rax
9292
; CHECK-NEXT: leaq (%r8,%r10), %rbx
9393
; CHECK-NEXT: addq %rdx, %rbx
9494
; CHECK-NEXT: addq %r9, %rax
@@ -113,8 +113,8 @@ define fastcc i64 @foo() nounwind {
113113
; CHECK-NEXT: addq %rsi, %rbx
114114
; CHECK-NEXT: addq %rbx, %rdi
115115
; CHECK-NEXT: addq %r9, %r10
116-
; CHECK-NEXT: bswapq %rax
117116
; CHECK-NEXT: addq %rbx, %r10
117+
; CHECK-NEXT: bswapq %rax
118118
; CHECK-NEXT: leaq (%rsi,%rdx), %rbx
119119
; CHECK-NEXT: addq %rdi, %rbx
120120
; CHECK-NEXT: addq %r8, %rax
@@ -139,8 +139,8 @@ define fastcc i64 @foo() nounwind {
139139
; CHECK-NEXT: addq %r11, %rbx
140140
; CHECK-NEXT: addq %rbx, %r14
141141
; CHECK-NEXT: addq %r8, %r15
142-
; CHECK-NEXT: bswapq %rax
143142
; CHECK-NEXT: addq %rbx, %r15
143+
; CHECK-NEXT: bswapq %rax
144144
; CHECK-NEXT: leaq (%r11,%rdi), %rbx
145145
; CHECK-NEXT: addq %r14, %rbx
146146
; CHECK-NEXT: addq %rsi, %rax
@@ -165,8 +165,8 @@ define fastcc i64 @foo() nounwind {
165165
; CHECK-NEXT: addq %r9, %rbx
166166
; CHECK-NEXT: addq %rbx, %r10
167167
; CHECK-NEXT: addq %rsi, %r12
168-
; CHECK-NEXT: bswapq %rcx
169168
; CHECK-NEXT: addq %rbx, %r12
169+
; CHECK-NEXT: bswapq %rcx
170170
; CHECK-NEXT: leaq (%r9,%r14), %rax
171171
; CHECK-NEXT: addq %r10, %rax
172172
; CHECK-NEXT: addq %r11, %rcx
@@ -191,8 +191,8 @@ define fastcc i64 @foo() nounwind {
191191
; CHECK-NEXT: addq %r8, %rbx
192192
; CHECK-NEXT: addq %rbx, %r15
193193
; CHECK-NEXT: addq %rax, %rcx
194-
; CHECK-NEXT: bswapq %rdx
195194
; CHECK-NEXT: addq %rbx, %rcx
195+
; CHECK-NEXT: bswapq %rdx
196196
; CHECK-NEXT: leaq (%r8,%r10), %rbx
197197
; CHECK-NEXT: addq %r15, %rbx
198198
; CHECK-NEXT: addq %r9, %rdx
@@ -217,9 +217,9 @@ define fastcc i64 @foo() nounwind {
217217
; CHECK-NEXT: addq %rsi, %rdx
218218
; CHECK-NEXT: addq %rdx, %r12
219219
; CHECK-NEXT: addq %rdx, %rcx
220-
; CHECK-NEXT: addq %r15, %rsi
221220
; CHECK-NEXT: movq X(%rip), %rax
222221
; CHECK-NEXT: bswapq %rax
222+
; CHECK-NEXT: addq %r15, %rsi
223223
; CHECK-NEXT: movq %rax, X(%rip)
224224
; CHECK-NEXT: addq %r8, %rax
225225
; CHECK-NEXT: addq %r12, %rsi

llvm/test/CodeGen/X86/fp-load-trunc.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,12 @@ define <8 x float> @test4(<8 x double>* %p) nounwind {
6969
; CHECK-LABEL: test4:
7070
; CHECK: # %bb.0:
7171
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
72-
; CHECK-NEXT: cvtpd2ps 16(%eax), %xmm1
72+
; CHECK-NEXT: cvtpd2ps 16(%eax), %xmm2
7373
; CHECK-NEXT: cvtpd2ps (%eax), %xmm0
74-
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
75-
; CHECK-NEXT: cvtpd2ps 48(%eax), %xmm2
74+
; CHECK-NEXT: cvtpd2ps 48(%eax), %xmm3
7675
; CHECK-NEXT: cvtpd2ps 32(%eax), %xmm1
77-
; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
76+
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
77+
; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
7878
; CHECK-NEXT: retl
7979
;
8080
; AVX-LABEL: test4:

llvm/test/CodeGen/X86/fp-trunc.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,11 @@ define <8 x float> @test4(<8 x double> %x) nounwind {
6161
; CHECK-LABEL: test4:
6262
; CHECK: # %bb.0:
6363
; CHECK-NEXT: subl $12, %esp
64-
; CHECK-NEXT: cvtpd2ps %xmm1, %xmm1
64+
; CHECK-NEXT: cvtpd2ps %xmm1, %xmm3
6565
; CHECK-NEXT: cvtpd2ps %xmm0, %xmm0
66-
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
6766
; CHECK-NEXT: cvtpd2ps %xmm2, %xmm1
6867
; CHECK-NEXT: cvtpd2ps {{[0-9]+}}(%esp), %xmm2
68+
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
6969
; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
7070
; CHECK-NEXT: addl $12, %esp
7171
; CHECK-NEXT: retl

llvm/test/CodeGen/X86/gather-addresses.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
4242
; LIN-SSE4-NEXT: cltq
4343
; LIN-SSE4-NEXT: movslq %ecx, %rcx
4444
; LIN-SSE4-NEXT: movslq %edx, %rdx
45+
; LIN-SSE4-NEXT: movslq %esi, %rsi
4546
; LIN-SSE4-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
4647
; LIN-SSE4-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
47-
; LIN-SSE4-NEXT: movslq %esi, %rax
4848
; LIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
4949
; LIN-SSE4-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
5050
; LIN-SSE4-NEXT: retq
@@ -81,9 +81,9 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
8181
; WIN-SSE4-NEXT: cltq
8282
; WIN-SSE4-NEXT: movslq %edx, %rdx
8383
; WIN-SSE4-NEXT: movslq %r8d, %r8
84+
; WIN-SSE4-NEXT: movslq %r9d, %r9
8485
; WIN-SSE4-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
8586
; WIN-SSE4-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
86-
; WIN-SSE4-NEXT: movslq %r9d, %rax
8787
; WIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
8888
; WIN-SSE4-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
8989
; WIN-SSE4-NEXT: retq
@@ -94,13 +94,13 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
9494
; LIN32-NEXT: pushl %esi
9595
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
9696
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
97-
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
98-
; LIN32-NEXT: movdqa (%edx), %xmm0
99-
; LIN32-NEXT: pand (%ecx), %xmm0
100-
; LIN32-NEXT: pextrd $1, %xmm0, %ecx
101-
; LIN32-NEXT: pextrd $2, %xmm0, %edx
102-
; LIN32-NEXT: pextrd $3, %xmm0, %esi
103-
; LIN32-NEXT: movd %xmm0, %edi
97+
; LIN32-NEXT: movdqa (%ecx), %xmm0
98+
; LIN32-NEXT: pand (%eax), %xmm0
99+
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
100+
; LIN32-NEXT: movd %xmm0, %ecx
101+
; LIN32-NEXT: pextrd $1, %xmm0, %edx
102+
; LIN32-NEXT: pextrd $2, %xmm0, %esi
103+
; LIN32-NEXT: pextrd $3, %xmm0, %edi
104104
; LIN32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
105105
; LIN32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
106106
; LIN32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
@@ -226,22 +226,22 @@ define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind
226226
; LIN32-NEXT: pushl %esi
227227
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
228228
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
229-
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
230-
; LIN32-NEXT: movdqa (%edx), %xmm0
231-
; LIN32-NEXT: pand (%ecx), %xmm0
229+
; LIN32-NEXT: movdqa (%ecx), %xmm0
230+
; LIN32-NEXT: pand (%eax), %xmm0
231+
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
232232
; LIN32-NEXT: movd %xmm0, %edx
233233
; LIN32-NEXT: pextrd $1, %xmm0, %esi
234-
; LIN32-NEXT: pextrd $2, %xmm0, %ecx
234+
; LIN32-NEXT: pextrd $2, %xmm0, %eax
235235
; LIN32-NEXT: pextrd $3, %xmm0, %edi
236-
; LIN32-NEXT: andl %eax, %edx
237-
; LIN32-NEXT: andl %eax, %esi
238-
; LIN32-NEXT: andl %eax, %ecx
239-
; LIN32-NEXT: andl %eax, %edi
236+
; LIN32-NEXT: andl %ecx, %edx
237+
; LIN32-NEXT: andl %ecx, %esi
238+
; LIN32-NEXT: andl %ecx, %eax
239+
; LIN32-NEXT: andl %ecx, %edi
240240
; LIN32-NEXT: movd %esi, %xmm1
241241
; LIN32-NEXT: movd %edx, %xmm0
242242
; LIN32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
243243
; LIN32-NEXT: movd %edi, %xmm2
244-
; LIN32-NEXT: movd %ecx, %xmm1
244+
; LIN32-NEXT: movd %eax, %xmm1
245245
; LIN32-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
246246
; LIN32-NEXT: popl %esi
247247
; LIN32-NEXT: popl %edi

0 commit comments

Comments
 (0)