Skip to content

Commit 630a6dd

Browse files
committed
[X86] Fix throughput of AVX2/AVX512VL vector extension/truncations
These should only consume 1cy on either of the 2 pipes (only zmm ops should double pump) - matches AMD SoG + uops.info Noticed while updating costs for #90748
1 parent c44d528 commit 630a6dd

File tree

4 files changed

+206
-198
lines changed

4 files changed

+206
-198
lines changed

llvm/lib/Target/X86/X86ScheduleZnver4.td

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1631,17 +1631,25 @@ def : InstRW<[Zn4WriteFCmp64], (instregex
16311631
)>;
16321632

16331633
// MOV Instructions
1634-
def Zn4MOVS: SchedWriteRes<[Zn4FPFMisc12]> {
1634+
def Zn4MOVDUPZ: SchedWriteRes<[Zn4FPFMisc12]> {
16351635
let Latency = 2;
16361636
let ReleaseAtCycles = [2];
16371637
let NumMicroOps = 1;
16381638
}
1639+
def : InstRW<[Zn4MOVDUPZ], (instregex
1640+
"(V?)VMOVDDUP(Z|Z128|Z256)(rr|rrk|rrkz)"
1641+
)>;
1642+
1643+
def Zn4MOVS: SchedWriteRes<[Zn4FPFMisc12]> {
1644+
let Latency = 2;
1645+
let ReleaseAtCycles = [1];
1646+
let NumMicroOps = 1;
1647+
}
16391648
def : InstRW<[Zn4MOVS], (instregex
16401649
"(V?)PMOV(SX|ZX)(BD|BQ|BW|WD|WQ|DQ)(Z128?|Z256?)(rr|rrk|rrkz)",
16411650
"(V?)PMOV(SX|QD|UZ|ZX)(BD|BQ|BW?)(Y|Z128?)(rr|rrk|rrkz)",
16421651
"(V?)PMOV(SX|US|ZX)(DQ|WD|QW|WQ?)(Y|Z128?)(rr|rrk|rrkz)",
1643-
"(V?)VMOVDDUP(Z|Z128|Z256)(rr|rrk|rrkz)",
1644-
"VPMOV(DB|DW|QB|QD|QW|SDB|SDW|SQB|SQD|SQW|SWB|USDB|USDW|USQB|USQD|USWB|WB)(Z128?)(rr|rrk|rrkz)"
1652+
"VPMOV(DB|DW|QB|QD|QW|SDB|SDW|SQB|SQD|SQW|SWB|USDB|USDW|USQB|USQD|USWB|WB)(Z128?|Z256?)(rr|rrk|rrkz)"
16451653
)>;
16461654

16471655
def Zn4MOVSZ: SchedWriteRes<[Zn4FPFMisc12]> {

llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -625,29 +625,29 @@ vpxor (%rax), %ymm1, %ymm2
625625
# CHECK-NEXT: 1 1 0.25 vpminuw %ymm0, %ymm1, %ymm2
626626
# CHECK-NEXT: 1 8 0.50 * vpminuw (%rax), %ymm1, %ymm2
627627
# CHECK-NEXT: 1 1 1.00 vpmovmskb %ymm0, %ecx
628-
# CHECK-NEXT: 1 2 1.00 vpmovsxbd %xmm0, %ymm2
628+
# CHECK-NEXT: 1 2 0.50 vpmovsxbd %xmm0, %ymm2
629629
# CHECK-NEXT: 1 11 1.50 * vpmovsxbd (%rax), %ymm2
630-
# CHECK-NEXT: 1 2 1.00 vpmovsxbq %xmm0, %ymm2
630+
# CHECK-NEXT: 1 2 0.50 vpmovsxbq %xmm0, %ymm2
631631
# CHECK-NEXT: 1 11 1.50 * vpmovsxbq (%rax), %ymm2
632-
# CHECK-NEXT: 1 2 1.00 vpmovsxbw %xmm0, %ymm2
632+
# CHECK-NEXT: 1 2 0.50 vpmovsxbw %xmm0, %ymm2
633633
# CHECK-NEXT: 1 11 1.50 * vpmovsxbw (%rax), %ymm2
634-
# CHECK-NEXT: 1 2 1.00 vpmovsxdq %xmm0, %ymm2
634+
# CHECK-NEXT: 1 2 0.50 vpmovsxdq %xmm0, %ymm2
635635
# CHECK-NEXT: 1 11 1.50 * vpmovsxdq (%rax), %ymm2
636-
# CHECK-NEXT: 1 2 1.00 vpmovsxwd %xmm0, %ymm2
636+
# CHECK-NEXT: 1 2 0.50 vpmovsxwd %xmm0, %ymm2
637637
# CHECK-NEXT: 1 11 1.50 * vpmovsxwd (%rax), %ymm2
638-
# CHECK-NEXT: 1 2 1.00 vpmovsxwq %xmm0, %ymm2
638+
# CHECK-NEXT: 1 2 0.50 vpmovsxwq %xmm0, %ymm2
639639
# CHECK-NEXT: 1 11 1.50 * vpmovsxwq (%rax), %ymm2
640-
# CHECK-NEXT: 1 2 1.00 vpmovzxbd %xmm0, %ymm2
640+
# CHECK-NEXT: 1 2 0.50 vpmovzxbd %xmm0, %ymm2
641641
# CHECK-NEXT: 1 11 1.50 * vpmovzxbd (%rax), %ymm2
642-
# CHECK-NEXT: 1 2 1.00 vpmovzxbq %xmm0, %ymm2
642+
# CHECK-NEXT: 1 2 0.50 vpmovzxbq %xmm0, %ymm2
643643
# CHECK-NEXT: 1 11 1.50 * vpmovzxbq (%rax), %ymm2
644-
# CHECK-NEXT: 1 2 1.00 vpmovzxbw %xmm0, %ymm2
644+
# CHECK-NEXT: 1 2 0.50 vpmovzxbw %xmm0, %ymm2
645645
# CHECK-NEXT: 1 11 1.50 * vpmovzxbw (%rax), %ymm2
646-
# CHECK-NEXT: 1 2 1.00 vpmovzxdq %xmm0, %ymm2
646+
# CHECK-NEXT: 1 2 0.50 vpmovzxdq %xmm0, %ymm2
647647
# CHECK-NEXT: 1 11 1.50 * vpmovzxdq (%rax), %ymm2
648-
# CHECK-NEXT: 1 2 1.00 vpmovzxwd %xmm0, %ymm2
648+
# CHECK-NEXT: 1 2 0.50 vpmovzxwd %xmm0, %ymm2
649649
# CHECK-NEXT: 1 11 1.50 * vpmovzxwd (%rax), %ymm2
650-
# CHECK-NEXT: 1 2 1.00 vpmovzxwq %xmm0, %ymm2
650+
# CHECK-NEXT: 1 2 0.50 vpmovzxwq %xmm0, %ymm2
651651
# CHECK-NEXT: 1 11 1.50 * vpmovzxwq (%rax), %ymm2
652652
# CHECK-NEXT: 1 3 0.50 vpmuldq %ymm0, %ymm1, %ymm2
653653
# CHECK-NEXT: 1 10 0.50 * vpmuldq (%rax), %ymm1, %ymm2
@@ -789,7 +789,7 @@ vpxor (%rax), %ymm1, %ymm2
789789

790790
# CHECK: Resource pressure per iteration:
791791
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
792-
# CHECK-NEXT: 6.67 6.67 6.67 - - - - - 93.75 138.75 98.25 36.25 80.50 80.50 29.00 52.33 52.33 52.33 50.67 50.67 50.67 2.50 2.50
792+
# CHECK-NEXT: 6.67 6.67 6.67 - - - - - 93.75 132.75 92.25 36.25 80.50 80.50 29.00 52.33 52.33 52.33 50.67 50.67 50.67 2.50 2.50
793793

794794
# CHECK: Resource pressure by instruction:
795795
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -958,29 +958,29 @@ vpxor (%rax), %ymm1, %ymm2
958958
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpminuw %ymm0, %ymm1, %ymm2
959959
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpminuw (%rax), %ymm1, %ymm2
960960
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vpmovmskb %ymm0, %ecx
961-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxbd %xmm0, %ymm2
961+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxbd %xmm0, %ymm2
962962
# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbd (%rax), %ymm2
963-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxbq %xmm0, %ymm2
963+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxbq %xmm0, %ymm2
964964
# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbq (%rax), %ymm2
965-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxbw %xmm0, %ymm2
965+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxbw %xmm0, %ymm2
966966
# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbw (%rax), %ymm2
967-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxdq %xmm0, %ymm2
967+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxdq %xmm0, %ymm2
968968
# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxdq (%rax), %ymm2
969-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxwd %xmm0, %ymm2
969+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxwd %xmm0, %ymm2
970970
# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwd (%rax), %ymm2
971-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovsxwq %xmm0, %ymm2
971+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxwq %xmm0, %ymm2
972972
# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwq (%rax), %ymm2
973-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxbd %xmm0, %ymm2
973+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxbd %xmm0, %ymm2
974974
# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbd (%rax), %ymm2
975-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxbq %xmm0, %ymm2
975+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxbq %xmm0, %ymm2
976976
# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbq (%rax), %ymm2
977-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxbw %xmm0, %ymm2
977+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxbw %xmm0, %ymm2
978978
# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbw (%rax), %ymm2
979-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxdq %xmm0, %ymm2
979+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxdq %xmm0, %ymm2
980980
# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxdq (%rax), %ymm2
981-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxwd %xmm0, %ymm2
981+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxwd %xmm0, %ymm2
982982
# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxwd (%rax), %ymm2
983-
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpmovzxwq %xmm0, %ymm2
983+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxwq %xmm0, %ymm2
984984
# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxwq (%rax), %ymm2
985985
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmuldq %ymm0, %ymm1, %ymm2
986986
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmuldq (%rax), %ymm1, %ymm2

0 commit comments

Comments
 (0)