Skip to content

Commit 51107be

Browse files
committed
[X86] Haswell/Broadwell/Skylake DPPS folded instructions use an extra port06 resource
This is an extension to 07151f0 which handled SandyBridge so we at least model the regression identified in #14640 Confirmed by Agner + uops.info/uica (SkylakeServer also had an incorrect use of Port015 instead of just Port01) I raised #86669 as a proposal for a 'x86 unfold' pass that can unfold these (if we have the free registers) driven by the scheduler model.
1 parent 5c1544c commit 51107be

File tree

12 files changed

+54
-46
lines changed

12 files changed

+54
-46
lines changed

llvm/lib/Target/X86/X86SchedBroadwell.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -324,8 +324,10 @@ defm : BWWriteResPair<WriteFMAX, [BWPort01], 5, [1], 1, 5>; // Fused Multiply
324324
defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply Add (YMM/ZMM).
325325
defm : X86WriteResPairUnsupported<WriteFMAZ>;
326326
defm : BWWriteResPair<WriteDPPD, [BWPort0,BWPort1,BWPort5], 9, [1,1,1], 3, 5>; // Floating point double dot product.
327-
defm : BWWriteResPair<WriteDPPS, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 5>; // Floating point single dot product.
328-
defm : BWWriteResPair<WriteDPPSY, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 6>; // Floating point single dot product (YMM).
327+
defm : X86WriteRes<WriteDPPS, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4>;
328+
defm : X86WriteRes<WriteDPPSY, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4>;
329+
defm : X86WriteRes<WriteDPPSLd, [BWPort0,BWPort1,BWPort5,BWPort06,BWPort23], 19, [2,1,1,1,1], 6>;
330+
defm : X86WriteRes<WriteDPPSYLd, [BWPort0,BWPort1,BWPort5,BWPort06,BWPort23], 20, [2,1,1,1,1], 6>;
329331
defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs.
330332
defm : X86WriteRes<WriteFRnd, [BWPort23], 6, [1], 1>; // Floating point rounding.
331333
defm : X86WriteRes<WriteFRndY, [BWPort23], 6, [1], 1>; // Floating point rounding (YMM/ZMM).

llvm/lib/Target/X86/X86SchedHaswell.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -324,8 +324,10 @@ defm : HWWriteResPair<WriteFMAX, [HWPort01], 5, [1], 1, 6>;
324324
defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
325325
defm : HWWriteResPair<WriteFMAZ, [HWPort01], 5, [1], 1, 7>; // Unsupported = 1
326326
defm : HWWriteResPair<WriteDPPD, [HWPort0,HWPort1,HWPort5], 9, [1,1,1], 3, 6>;
327-
defm : HWWriteResPair<WriteDPPS, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 6>;
328-
defm : HWWriteResPair<WriteDPPSY, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>;
327+
defm : X86WriteRes<WriteDPPS, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4>;
328+
defm : X86WriteRes<WriteDPPSY, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4>;
329+
defm : X86WriteRes<WriteDPPSLd, [HWPort0,HWPort1,HWPort5,HWPort06,HWPort23], 20, [2,1,1,1,1], 6>;
330+
defm : X86WriteRes<WriteDPPSYLd, [HWPort0,HWPort1,HWPort5,HWPort06,HWPort23], 21, [2,1,1,1,1], 6>;
329331
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
330332
defm : X86WriteRes<WriteFRnd, [HWPort23], 6, [1], 1>;
331333
defm : X86WriteRes<WriteFRndY, [HWPort23], 6, [1], 1>;

llvm/lib/Target/X86/X86SchedSkylakeClient.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,8 +311,10 @@ defm : SKLWriteResPair<WriteFMAX, [SKLPort01], 4, [1], 1, 6>;
311311
defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>;
312312
defm : X86WriteResPairUnsupported<WriteFMAZ>;
313313
defm : SKLWriteResPair<WriteDPPD, [SKLPort5,SKLPort01], 9, [1,2], 3, 6>; // Floating point double dot product.
314-
defm : SKLWriteResPair<WriteDPPS, [SKLPort5,SKLPort01], 13, [1,3], 4, 6>;
315-
defm : SKLWriteResPair<WriteDPPSY, [SKLPort5,SKLPort01], 13, [1,3], 4, 7>;
314+
defm : X86WriteRes<WriteDPPS, [SKLPort5,SKLPort01], 13, [1,3], 4>;
315+
defm : X86WriteRes<WriteDPPSY, [SKLPort5,SKLPort01], 13, [1,3], 4>;
316+
defm : X86WriteRes<WriteDPPSLd, [SKLPort5,SKLPort01,SKLPort06,SKLPort23], 19, [1,3,1,1], 6>;
317+
defm : X86WriteRes<WriteDPPSYLd, [SKLPort5,SKLPort01,SKLPort06,SKLPort23], 20, [1,3,1,1], 6>;
316318
defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs.
317319
defm : SKLWriteResPair<WriteFRnd, [SKLPort01], 8, [2], 2, 6>; // Floating point rounding.
318320
defm : SKLWriteResPair<WriteFRndY, [SKLPort01], 8, [2], 2, 7>;

llvm/lib/Target/X86/X86SchedSkylakeServer.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,8 +311,10 @@ defm : SKXWriteResPair<WriteFMAX, [SKXPort01], 4, [1], 1, 6>;
311311
defm : SKXWriteResPair<WriteFMAY, [SKXPort01], 4, [1], 1, 7>;
312312
defm : SKXWriteResPair<WriteFMAZ, [SKXPort05], 4, [1], 1, 7>;
313313
defm : SKXWriteResPair<WriteDPPD, [SKXPort5,SKXPort015], 9, [1,2], 3, 6>; // Floating point double dot product.
314-
defm : SKXWriteResPair<WriteDPPS, [SKXPort5,SKXPort015], 13, [1,3], 4, 6>;
315-
defm : SKXWriteResPair<WriteDPPSY,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>;
314+
defm : X86WriteRes<WriteDPPS, [SKXPort5,SKXPort01], 13, [1,3], 4>;
315+
defm : X86WriteRes<WriteDPPSY, [SKXPort5,SKXPort01], 13, [1,3], 4>;
316+
defm : X86WriteRes<WriteDPPSLd, [SKXPort5,SKXPort01,SKXPort06,SKXPort23], 19, [1,3,1,1], 6>;
317+
defm : X86WriteRes<WriteDPPSYLd, [SKXPort5,SKXPort01,SKXPort06,SKXPort23], 20, [1,3,1,1], 6>;
316318
defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs.
317319
defm : SKXWriteResPair<WriteFRnd, [SKXPort01], 8, [2], 2, 6>; // Floating point rounding.
318320
defm : SKXWriteResPair<WriteFRndY, [SKXPort01], 8, [2], 2, 7>;

llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,9 +1189,9 @@ vzeroupper
11891189
# CHECK-NEXT: 3 9 1.00 vdppd $22, %xmm0, %xmm1, %xmm2
11901190
# CHECK-NEXT: 4 14 1.00 * vdppd $22, (%rax), %xmm1, %xmm2
11911191
# CHECK-NEXT: 4 14 2.00 vdpps $22, %xmm0, %xmm1, %xmm2
1192-
# CHECK-NEXT: 5 19 2.00 * vdpps $22, (%rax), %xmm1, %xmm2
1192+
# CHECK-NEXT: 6 19 2.00 * vdpps $22, (%rax), %xmm1, %xmm2
11931193
# CHECK-NEXT: 4 14 2.00 vdpps $22, %ymm0, %ymm1, %ymm2
1194-
# CHECK-NEXT: 5 20 2.00 * vdpps $22, (%rax), %ymm1, %ymm2
1194+
# CHECK-NEXT: 6 20 2.00 * vdpps $22, (%rax), %ymm1, %ymm2
11951195
# CHECK-NEXT: 1 3 1.00 vextractf128 $1, %ymm0, %xmm2
11961196
# CHECK-NEXT: 2 1 1.00 * vextractf128 $1, %ymm0, (%rax)
11971197
# CHECK-NEXT: 2 2 1.00 vextractps $1, %xmm0, %ecx
@@ -1736,7 +1736,7 @@ vzeroupper
17361736

17371737
# CHECK: Resource pressure per iteration:
17381738
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
1739-
# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 424.25 2.25 12.67
1739+
# CHECK-NEXT: - 257.00 216.25 235.25 176.17 176.17 38.00 424.25 3.25 12.67
17401740

17411741
# CHECK: Resource pressure by instruction:
17421742
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1899,9 +1899,9 @@ vzeroupper
18991899
# CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - vdppd $22, %xmm0, %xmm1, %xmm2
19001900
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - vdppd $22, (%rax), %xmm1, %xmm2
19011901
# CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - vdpps $22, %xmm0, %xmm1, %xmm2
1902-
# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %xmm1, %xmm2
1902+
# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %xmm1, %xmm2
19031903
# CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - vdpps $22, %ymm0, %ymm1, %ymm2
1904-
# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %ymm1, %ymm2
1904+
# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %ymm1, %ymm2
19051905
# CHECK-NEXT: - - - - - - - 1.00 - - vextractf128 $1, %ymm0, %xmm2
19061906
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextractf128 $1, %ymm0, (%rax)
19071907
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vextractps $1, %xmm0, %ecx

llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ roundss $1, (%rax), %xmm2
166166
# CHECK-NEXT: 3 9 1.00 dppd $22, %xmm0, %xmm2
167167
# CHECK-NEXT: 4 14 1.00 * dppd $22, (%rax), %xmm2
168168
# CHECK-NEXT: 4 14 2.00 dpps $22, %xmm0, %xmm2
169-
# CHECK-NEXT: 5 19 2.00 * dpps $22, (%rax), %xmm2
169+
# CHECK-NEXT: 6 19 2.00 * dpps $22, (%rax), %xmm2
170170
# CHECK-NEXT: 2 2 1.00 extractps $1, %xmm0, %ecx
171171
# CHECK-NEXT: 3 2 1.00 * extractps $1, %xmm0, (%rax)
172172
# CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
@@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2
266266

267267
# CHECK: Resource pressure per iteration:
268268
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
269-
# CHECK-NEXT: - - 23.33 22.33 25.67 25.67 5.00 80.33 - 1.67
269+
# CHECK-NEXT: - - 23.83 22.33 25.67 25.67 5.00 80.33 0.50 1.67
270270

271271
# CHECK: Resource pressure by instruction:
272272
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -281,7 +281,7 @@ roundss $1, (%rax), %xmm2
281281
# CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - dppd $22, %xmm0, %xmm2
282282
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - dppd $22, (%rax), %xmm2
283283
# CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - dpps $22, %xmm0, %xmm2
284-
# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - dpps $22, (%rax), %xmm2
284+
# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - dpps $22, (%rax), %xmm2
285285
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - extractps $1, %xmm0, %ecx
286286
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 extractps $1, %xmm0, (%rax)
287287
# CHECK-NEXT: - - - - - - - 1.00 - - insertps $1, %xmm0, %xmm2

llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,9 +1189,9 @@ vzeroupper
11891189
# CHECK-NEXT: 3 9 1.00 vdppd $22, %xmm0, %xmm1, %xmm2
11901190
# CHECK-NEXT: 4 15 1.00 * vdppd $22, (%rax), %xmm1, %xmm2
11911191
# CHECK-NEXT: 4 14 2.00 vdpps $22, %xmm0, %xmm1, %xmm2
1192-
# CHECK-NEXT: 5 20 2.00 * vdpps $22, (%rax), %xmm1, %xmm2
1192+
# CHECK-NEXT: 6 20 2.00 * vdpps $22, (%rax), %xmm1, %xmm2
11931193
# CHECK-NEXT: 4 14 2.00 vdpps $22, %ymm0, %ymm1, %ymm2
1194-
# CHECK-NEXT: 5 21 2.00 * vdpps $22, (%rax), %ymm1, %ymm2
1194+
# CHECK-NEXT: 6 21 2.00 * vdpps $22, (%rax), %ymm1, %ymm2
11951195
# CHECK-NEXT: 1 3 1.00 vextractf128 $1, %ymm0, %xmm2
11961196
# CHECK-NEXT: 2 1 1.00 * vextractf128 $1, %ymm0, (%rax)
11971197
# CHECK-NEXT: 2 2 1.00 vextractps $1, %xmm0, %ecx
@@ -1736,7 +1736,7 @@ vzeroupper
17361736

17371737
# CHECK: Resource pressure per iteration:
17381738
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
1739-
# CHECK-NEXT: - 336.00 214.58 236.58 176.17 176.17 38.00 427.58 2.25 12.67
1739+
# CHECK-NEXT: - 336.00 215.58 236.58 176.17 176.17 38.00 427.58 3.25 12.67
17401740

17411741
# CHECK: Resource pressure by instruction:
17421742
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1899,9 +1899,9 @@ vzeroupper
18991899
# CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - vdppd $22, %xmm0, %xmm1, %xmm2
19001900
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - vdppd $22, (%rax), %xmm1, %xmm2
19011901
# CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - vdpps $22, %xmm0, %xmm1, %xmm2
1902-
# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %xmm1, %xmm2
1902+
# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %xmm1, %xmm2
19031903
# CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - vdpps $22, %ymm0, %ymm1, %ymm2
1904-
# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %ymm1, %ymm2
1904+
# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %ymm1, %ymm2
19051905
# CHECK-NEXT: - - - - - - - 1.00 - - vextractf128 $1, %ymm0, %xmm2
19061906
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextractf128 $1, %ymm0, (%rax)
19071907
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vextractps $1, %xmm0, %ecx

llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ roundss $1, (%rax), %xmm2
166166
# CHECK-NEXT: 3 9 1.00 dppd $22, %xmm0, %xmm2
167167
# CHECK-NEXT: 4 15 1.00 * dppd $22, (%rax), %xmm2
168168
# CHECK-NEXT: 4 14 2.00 dpps $22, %xmm0, %xmm2
169-
# CHECK-NEXT: 5 20 2.00 * dpps $22, (%rax), %xmm2
169+
# CHECK-NEXT: 6 20 2.00 * dpps $22, (%rax), %xmm2
170170
# CHECK-NEXT: 2 2 1.00 extractps $1, %xmm0, %ecx
171171
# CHECK-NEXT: 3 2 1.00 * extractps $1, %xmm0, (%rax)
172172
# CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
@@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2
266266

267267
# CHECK: Resource pressure per iteration:
268268
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
269-
# CHECK-NEXT: - - 23.33 22.33 25.67 25.67 5.00 80.33 - 1.67
269+
# CHECK-NEXT: - - 23.83 22.33 25.67 25.67 5.00 80.33 0.50 1.67
270270

271271
# CHECK: Resource pressure by instruction:
272272
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -281,7 +281,7 @@ roundss $1, (%rax), %xmm2
281281
# CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - dppd $22, %xmm0, %xmm2
282282
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - dppd $22, (%rax), %xmm2
283283
# CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - dpps $22, %xmm0, %xmm2
284-
# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - dpps $22, (%rax), %xmm2
284+
# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - dpps $22, (%rax), %xmm2
285285
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - extractps $1, %xmm0, %ecx
286286
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 extractps $1, %xmm0, (%rax)
287287
# CHECK-NEXT: - - - - - - - 1.00 - - insertps $1, %xmm0, %xmm2

llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,9 +1189,9 @@ vzeroupper
11891189
# CHECK-NEXT: 3 9 1.00 vdppd $22, %xmm0, %xmm1, %xmm2
11901190
# CHECK-NEXT: 4 15 1.00 * vdppd $22, (%rax), %xmm1, %xmm2
11911191
# CHECK-NEXT: 4 13 1.50 vdpps $22, %xmm0, %xmm1, %xmm2
1192-
# CHECK-NEXT: 5 19 1.50 * vdpps $22, (%rax), %xmm1, %xmm2
1192+
# CHECK-NEXT: 6 19 1.50 * vdpps $22, (%rax), %xmm1, %xmm2
11931193
# CHECK-NEXT: 4 13 1.50 vdpps $22, %ymm0, %ymm1, %ymm2
1194-
# CHECK-NEXT: 5 20 1.50 * vdpps $22, (%rax), %ymm1, %ymm2
1194+
# CHECK-NEXT: 6 20 1.50 * vdpps $22, (%rax), %ymm1, %ymm2
11951195
# CHECK-NEXT: 1 3 1.00 vextractf128 $1, %ymm0, %xmm2
11961196
# CHECK-NEXT: 2 1 1.00 * vextractf128 $1, %ymm0, (%rax)
11971197
# CHECK-NEXT: 2 3 1.00 vextractps $1, %xmm0, %ecx
@@ -1736,7 +1736,7 @@ vzeroupper
17361736

17371737
# CHECK: Resource pressure per iteration:
17381738
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
1739-
# CHECK-NEXT: - 126.00 338.58 199.58 173.83 173.83 38.00 326.58 5.25 11.33
1739+
# CHECK-NEXT: - 126.00 339.58 199.58 173.83 173.83 38.00 326.58 6.25 11.33
17401740

17411741
# CHECK: Resource pressure by instruction:
17421742
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1899,9 +1899,9 @@ vzeroupper
18991899
# CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - vdppd $22, %xmm0, %xmm1, %xmm2
19001900
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - vdppd $22, (%rax), %xmm1, %xmm2
19011901
# CHECK-NEXT: - - 1.50 1.50 - - - 1.00 - - vdpps $22, %xmm0, %xmm1, %xmm2
1902-
# CHECK-NEXT: - - 1.50 1.50 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %xmm1, %xmm2
1902+
# CHECK-NEXT: - - 2.00 1.50 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %xmm1, %xmm2
19031903
# CHECK-NEXT: - - 1.50 1.50 - - - 1.00 - - vdpps $22, %ymm0, %ymm1, %ymm2
1904-
# CHECK-NEXT: - - 1.50 1.50 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %ymm1, %ymm2
1904+
# CHECK-NEXT: - - 2.00 1.50 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %ymm1, %ymm2
19051905
# CHECK-NEXT: - - - - - - - 1.00 - - vextractf128 $1, %ymm0, %xmm2
19061906
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextractf128 $1, %ymm0, (%rax)
19071907
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vextractps $1, %xmm0, %ecx

llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ roundss $1, (%rax), %xmm2
166166
# CHECK-NEXT: 3 9 1.00 dppd $22, %xmm0, %xmm2
167167
# CHECK-NEXT: 4 15 1.00 * dppd $22, (%rax), %xmm2
168168
# CHECK-NEXT: 4 13 1.50 dpps $22, %xmm0, %xmm2
169-
# CHECK-NEXT: 5 19 1.50 * dpps $22, (%rax), %xmm2
169+
# CHECK-NEXT: 6 19 1.50 * dpps $22, (%rax), %xmm2
170170
# CHECK-NEXT: 2 3 1.00 extractps $1, %xmm0, %ecx
171171
# CHECK-NEXT: 3 2 1.00 * extractps $1, %xmm0, (%rax)
172172
# CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
@@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2
266266

267267
# CHECK: Resource pressure per iteration:
268268
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
269-
# CHECK-NEXT: - - 37.33 31.33 23.67 23.67 5.00 63.33 - 1.67
269+
# CHECK-NEXT: - - 37.83 31.33 23.67 23.67 5.00 63.33 0.50 1.67
270270

271271
# CHECK: Resource pressure by instruction:
272272
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -281,7 +281,7 @@ roundss $1, (%rax), %xmm2
281281
# CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - dppd $22, %xmm0, %xmm2
282282
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - dppd $22, (%rax), %xmm2
283283
# CHECK-NEXT: - - 1.50 1.50 - - - 1.00 - - dpps $22, %xmm0, %xmm2
284-
# CHECK-NEXT: - - 1.50 1.50 0.50 0.50 - 1.00 - - dpps $22, (%rax), %xmm2
284+
# CHECK-NEXT: - - 2.00 1.50 0.50 0.50 - 1.00 0.50 - dpps $22, (%rax), %xmm2
285285
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - extractps $1, %xmm0, %ecx
286286
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 extractps $1, %xmm0, (%rax)
287287
# CHECK-NEXT: - - - - - - - 1.00 - - insertps $1, %xmm0, %xmm2

0 commit comments

Comments
 (0)