Skip to content

Commit 83de8c2

Browse files
committed
[X86] Fix SkylakeClient ports for int-to-double conversions
These are performed on SKLPort01 (+ SKLPort5/SKLPort23 for rr/rm shuffles/loads) Also, cleanup some MMX CVT overrides that match the SSE equivalents. Matches uops.info + Agner
1 parent 12e0e31 commit 83de8c2

File tree

4 files changed

+32
-44
lines changed

4 files changed

+32
-44
lines changed

llvm/lib/Target/X86/X86SchedSkylakeClient.td

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -460,8 +460,10 @@ defm : SKLWriteResPair<WriteCvtI2PSY, [SKLPort01], 4, [1], 1, 7>;
460460
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
461461
defm : X86WriteRes<WriteCvtI2SD, [SKLPort5,SKLPort01], 5, [1,1], 2>;
462462
defm : X86WriteRes<WriteCvtI2SDLd, [SKLPort23,SKLPort01], 10, [1,1], 2>;
463-
defm : SKLWriteResPair<WriteCvtI2PD, [SKLPort0,SKLPort5], 5, [1,1], 2, 6>;
464-
defm : SKLWriteResPair<WriteCvtI2PDY, [SKLPort0,SKLPort5], 7, [1,1], 2, 6>;
463+
defm : X86WriteRes<WriteCvtI2PD, [SKLPort5,SKLPort01], 5, [1,1], 2>;
464+
defm : X86WriteRes<WriteCvtI2PDLd, [SKLPort23,SKLPort01], 10, [1,1], 2>;
465+
defm : X86WriteRes<WriteCvtI2PDY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
466+
defm : X86WriteRes<WriteCvtI2PDYLd, [SKLPort23,SKLPort01], 11, [1,1], 2>;
465467
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
466468

467469
defm : X86WriteRes<WriteCvtSS2SD, [SKLPort5,SKLPort01], 5, [1,1], 2>;
@@ -925,7 +927,7 @@ def SKLWriteResGroup57 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort0156]> {
925927
}
926928
def: InstRW<[SKLWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
927929

928-
def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort01]> {
930+
def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort0]> {
929931
let Latency = 5;
930932
let NumMicroOps = 2;
931933
let ReleaseAtCycles = [1,1];
@@ -965,7 +967,7 @@ def: InstRW<[SKLWriteResGroup67], (instregex "(V?)MOVSHDUPrm",
965967
"(V?)MOVSLDUPrm",
966968
"(V?)MOVDDUPrm")>;
967969

968-
def SKLWriteResGroup68 : SchedWriteRes<[SKLPort0]> {
970+
def SKLWriteResGroup68 : SchedWriteRes<[SKLPort01]> {
969971
let Latency = 6;
970972
let NumMicroOps = 2;
971973
let ReleaseAtCycles = [2];
@@ -1239,13 +1241,6 @@ def SKLWriteResGroup119 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06
12391241
}
12401242
def: SchedAlias<WriteADCRMW, SKLWriteResGroup119>;
12411243

1242-
def SKLWriteResGroup120 : SchedWriteRes<[SKLPort0,SKLPort23]> {
1243-
let Latency = 9;
1244-
let NumMicroOps = 2;
1245-
let ReleaseAtCycles = [1,1];
1246-
}
1247-
def: InstRW<[SKLWriteResGroup120], (instrs MMX_CVTPI2PSrm)>;
1248-
12491244
def SKLWriteResGroup121 : SchedWriteRes<[SKLPort5,SKLPort23]> {
12501245
let Latency = 9;
12511246
let NumMicroOps = 2;
@@ -1258,7 +1253,7 @@ def: InstRW<[SKLWriteResGroup121], (instrs PCMPGTQrm,
12581253
VPMOVSXWDYrm,
12591254
VPMOVZXWDYrm)>;
12601255

1261-
def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort01]> {
1256+
def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort0]> {
12621257
let Latency = 9;
12631258
let NumMicroOps = 2;
12641259
let ReleaseAtCycles = [1,1];
@@ -1290,13 +1285,6 @@ def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
12901285
"ILD_F(16|32|64)m")>;
12911286
def: InstRW<[SKLWriteResGroup133], (instrs VPCMPGTQYrm)>;
12921287

1293-
def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
1294-
let Latency = 10;
1295-
let NumMicroOps = 3;
1296-
let ReleaseAtCycles = [1,1,1];
1297-
}
1298-
def: InstRW<[SKLWriteResGroup138], (instrs MMX_CVTPI2PDrm)>;
1299-
13001288
def SKLWriteResGroup140 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
13011289
let Latency = 10;
13021290
let NumMicroOps = 4;

llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,9 +1115,9 @@ vzeroupper
11151115
# CHECK-NEXT: 1 2 1.00 vcomiss %xmm0, %xmm1
11161116
# CHECK-NEXT: 2 7 1.00 * vcomiss (%rax), %xmm1
11171117
# CHECK-NEXT: 2 5 1.00 vcvtdq2pd %xmm0, %xmm2
1118-
# CHECK-NEXT: 3 11 1.00 * vcvtdq2pd (%rax), %xmm2
1118+
# CHECK-NEXT: 2 10 0.50 * vcvtdq2pd (%rax), %xmm2
11191119
# CHECK-NEXT: 2 7 1.00 vcvtdq2pd %xmm0, %ymm2
1120-
# CHECK-NEXT: 3 13 1.00 * vcvtdq2pd (%rax), %ymm2
1120+
# CHECK-NEXT: 2 11 0.50 * vcvtdq2pd (%rax), %ymm2
11211121
# CHECK-NEXT: 1 4 0.50 vcvtdq2ps %xmm0, %xmm2
11221122
# CHECK-NEXT: 2 10 0.50 * vcvtdq2ps (%rax), %xmm2
11231123
# CHECK-NEXT: 1 4 0.50 vcvtdq2ps %ymm0, %ymm2
@@ -1736,7 +1736,7 @@ vzeroupper
17361736

17371737
# CHECK: Resource pressure per iteration:
17381738
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
1739-
# CHECK-NEXT: - 126.00 340.58 198.58 173.83 173.83 38.00 326.58 6.25 11.33
1739+
# CHECK-NEXT: - 126.00 338.58 200.58 173.83 173.83 38.00 324.58 6.25 11.33
17401740

17411741
# CHECK: Resource pressure by instruction:
17421742
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1824,10 +1824,10 @@ vzeroupper
18241824
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcomisd (%rax), %xmm1
18251825
# CHECK-NEXT: - - 1.00 - - - - - - - vcomiss %xmm0, %xmm1
18261826
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcomiss (%rax), %xmm1
1827-
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtdq2pd %xmm0, %xmm2
1828-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %xmm2
1829-
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2
1830-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %ymm2
1827+
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtdq2pd %xmm0, %xmm2
1828+
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2pd (%rax), %xmm2
1829+
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2
1830+
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2pd (%rax), %ymm2
18311831
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %xmm0, %xmm2
18321832
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2ps (%rax), %xmm2
18331833
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %ymm0, %ymm2

llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -208,10 +208,10 @@ xorps (%rax), %xmm2
208208
# CHECK-NEXT: 2 9 0.50 * cmpeqss (%rax), %xmm2
209209
# CHECK-NEXT: 1 2 1.00 comiss %xmm0, %xmm1
210210
# CHECK-NEXT: 2 7 1.00 * comiss (%rax), %xmm1
211-
# CHECK-NEXT: 2 6 2.00 cvtpi2ps %mm0, %xmm2
212-
# CHECK-NEXT: 2 9 1.00 * cvtpi2ps (%rax), %xmm2
211+
# CHECK-NEXT: 2 6 1.00 cvtpi2ps %mm0, %xmm2
212+
# CHECK-NEXT: 2 10 0.50 * cvtpi2ps (%rax), %xmm2
213213
# CHECK-NEXT: 2 5 1.00 cvtps2pi %xmm0, %mm2
214-
# CHECK-NEXT: 2 9 0.50 * cvtps2pi (%rax), %mm2
214+
# CHECK-NEXT: 2 9 1.00 * cvtps2pi (%rax), %mm2
215215
# CHECK-NEXT: 2 5 1.00 cvtsi2ss %ecx, %xmm2
216216
# CHECK-NEXT: 3 6 2.00 cvtsi2ss %rcx, %xmm2
217217
# CHECK-NEXT: 2 10 0.50 * cvtsi2ssl (%rax), %xmm2
@@ -221,7 +221,7 @@ xorps (%rax), %xmm2
221221
# CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %ecx
222222
# CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %rcx
223223
# CHECK-NEXT: 2 5 1.00 cvttps2pi %xmm0, %mm2
224-
# CHECK-NEXT: 2 9 0.50 * cvttps2pi (%rax), %mm2
224+
# CHECK-NEXT: 2 9 1.00 * cvttps2pi (%rax), %mm2
225225
# CHECK-NEXT: 2 6 1.00 cvttss2si %xmm0, %ecx
226226
# CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %rcx
227227
# CHECK-NEXT: 3 11 1.00 * cvttss2si (%rax), %ecx
@@ -333,7 +333,7 @@ xorps (%rax), %xmm2
333333

334334
# CHECK: Resource pressure per iteration:
335335
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
336-
# CHECK-NEXT: - 24.00 72.83 23.83 32.00 32.00 8.00 31.83 0.50 3.00
336+
# CHECK-NEXT: - 24.00 73.33 23.33 32.00 32.00 8.00 31.83 0.50 3.00
337337

338338
# CHECK: Resource pressure by instruction:
339339
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -351,10 +351,10 @@ xorps (%rax), %xmm2
351351
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmpeqss (%rax), %xmm2
352352
# CHECK-NEXT: - - 1.00 - - - - - - - comiss %xmm0, %xmm1
353353
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - comiss (%rax), %xmm1
354-
# CHECK-NEXT: - - 2.00 - - - - - - - cvtpi2ps %mm0, %xmm2
355-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - cvtpi2ps (%rax), %xmm2
356-
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtps2pi %xmm0, %mm2
357-
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2pi (%rax), %mm2
354+
# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvtpi2ps %mm0, %xmm2
355+
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtpi2ps (%rax), %xmm2
356+
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtps2pi %xmm0, %mm2
357+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - cvtps2pi (%rax), %mm2
358358
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2ss %ecx, %xmm2
359359
# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - cvtsi2ss %rcx, %xmm2
360360
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2
@@ -363,8 +363,8 @@ xorps (%rax), %xmm2
363363
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvtss2si %xmm0, %rcx
364364
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %ecx
365365
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %rcx
366-
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvttps2pi %xmm0, %mm2
367-
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvttps2pi (%rax), %mm2
366+
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvttps2pi %xmm0, %mm2
367+
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - cvttps2pi (%rax), %mm2
368368
# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvttss2si %xmm0, %ecx
369369
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvttss2si %xmm0, %rcx
370370
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvttss2si (%rax), %ecx

llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ xorpd (%rax), %xmm2
423423
# CHECK-NEXT: 1 2 1.00 comisd %xmm0, %xmm1
424424
# CHECK-NEXT: 2 7 1.00 * comisd (%rax), %xmm1
425425
# CHECK-NEXT: 2 5 1.00 cvtdq2pd %xmm0, %xmm2
426-
# CHECK-NEXT: 3 11 1.00 * cvtdq2pd (%rax), %xmm2
426+
# CHECK-NEXT: 2 10 0.50 * cvtdq2pd (%rax), %xmm2
427427
# CHECK-NEXT: 1 4 0.50 cvtdq2ps %xmm0, %xmm2
428428
# CHECK-NEXT: 2 10 0.50 * cvtdq2ps (%rax), %xmm2
429429
# CHECK-NEXT: 2 5 1.00 cvtpd2dq %xmm0, %xmm2
@@ -433,7 +433,7 @@ xorpd (%rax), %xmm2
433433
# CHECK-NEXT: 2 5 1.00 cvtpd2ps %xmm0, %xmm2
434434
# CHECK-NEXT: 3 11 1.00 * cvtpd2ps (%rax), %xmm2
435435
# CHECK-NEXT: 2 5 1.00 cvtpi2pd %mm0, %xmm2
436-
# CHECK-NEXT: 3 10 1.00 * cvtpi2pd (%rax), %xmm2
436+
# CHECK-NEXT: 2 10 0.50 * cvtpi2pd (%rax), %xmm2
437437
# CHECK-NEXT: 1 4 0.50 cvtps2dq %xmm0, %xmm2
438438
# CHECK-NEXT: 2 10 0.50 * cvtps2dq (%rax), %xmm2
439439
# CHECK-NEXT: 2 5 1.00 cvtps2pd %xmm0, %xmm2
@@ -689,7 +689,7 @@ xorpd (%rax), %xmm2
689689

690690
# CHECK: Resource pressure per iteration:
691691
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
692-
# CHECK-NEXT: - 40.00 113.58 79.58 63.50 63.50 14.00 93.58 2.25 5.00
692+
# CHECK-NEXT: - 40.00 111.58 81.58 63.50 63.50 14.00 91.58 2.25 5.00
693693

694694
# CHECK: Resource pressure by instruction:
695695
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -708,8 +708,8 @@ xorpd (%rax), %xmm2
708708
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmpeqsd (%rax), %xmm2
709709
# CHECK-NEXT: - - 1.00 - - - - - - - comisd %xmm0, %xmm1
710710
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - comisd (%rax), %xmm1
711-
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtdq2pd %xmm0, %xmm2
712-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2
711+
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtdq2pd %xmm0, %xmm2
712+
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtdq2pd (%rax), %xmm2
713713
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtdq2ps %xmm0, %xmm2
714714
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtdq2ps (%rax), %xmm2
715715
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2dq %xmm0, %xmm2
@@ -718,8 +718,8 @@ xorpd (%rax), %xmm2
718718
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2pi (%rax), %mm2
719719
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2
720720
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2
721-
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtpi2pd %mm0, %xmm2
722-
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2
721+
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpi2pd %mm0, %xmm2
722+
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtpi2pd (%rax), %xmm2
723723
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtps2dq %xmm0, %xmm2
724724
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2dq (%rax), %xmm2
725725
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtps2pd %xmm0, %xmm2

0 commit comments

Comments
 (0)