Skip to content

Commit 9e3e8b5

Browse files
committed
[X86] VPERM2*128 instructions aren't microcoded on znver1
AMD refer to them as microcoded, but not in the same way as LLVM - the uop count and pipe usage is high but predictable Confirmed with Agner + uops.info.
1 parent 5ab65a6 commit 9e3e8b5

File tree

3 files changed

+25
-14
lines changed

3 files changed

+25
-14
lines changed

llvm/lib/Target/X86/X86ScheduleZnver1.td

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -986,10 +986,21 @@ def : InstRW<[ZnWritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
986986
//-- Move instructions --//
987987

988988
// VPERM2F128 / VPERM2I128.
989-
def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr,
990-
VPERM2I128rr)>;
991-
def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm,
992-
VPERM2I128rm)>;
989+
def ZnWriteVPERM2r : SchedWriteRes<[ZnFPU0, ZnFPU12]> {
990+
let NumMicroOps = 8;
991+
let Latency = 3;
992+
let ReleaseAtCycles = [3,3];
993+
}
994+
def : InstRW<[ZnWriteVPERM2r], (instrs VPERM2F128rr,
995+
VPERM2I128rr)>;
996+
997+
def ZnWriteVPERM2m : SchedWriteRes<[ZnAGU, ZnFPU0, ZnFPU12]> {
998+
let NumMicroOps = 12;
999+
let Latency = 8;
1000+
let ReleaseAtCycles = [1,3,3];
1001+
}
1002+
def : InstRW<[ZnWriteVPERM2m], (instrs VPERM2F128rm,
1003+
VPERM2I128rm)>;
9931004

9941005
def ZnWriteBROADCAST : SchedWriteRes<[ZnAGU, ZnFPU13]> {
9951006
let NumMicroOps = 2;

llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1429,8 +1429,8 @@ vzeroupper
14291429
# CHECK-NEXT: 1 100 0.25 * vpcmpistri $1, (%rax), %xmm2
14301430
# CHECK-NEXT: 1 100 0.25 vpcmpistrm $1, %xmm0, %xmm2
14311431
# CHECK-NEXT: 1 100 0.25 * vpcmpistrm $1, (%rax), %xmm2
1432-
# CHECK-NEXT: 1 100 0.25 vperm2f128 $1, %ymm0, %ymm1, %ymm2
1433-
# CHECK-NEXT: 1 100 0.25 * vperm2f128 $1, (%rax), %ymm1, %ymm2
1432+
# CHECK-NEXT: 8 3 3.00 vperm2f128 $1, %ymm0, %ymm1, %ymm2
1433+
# CHECK-NEXT: 12 8 3.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
14341434
# CHECK-NEXT: 1 1 0.50 vpermilpd $1, %xmm0, %xmm2
14351435
# CHECK-NEXT: 1 8 0.50 * vpermilpd $1, (%rax), %xmm2
14361436
# CHECK-NEXT: 1 1 0.50 vpermilpd %xmm0, %xmm1, %xmm2
@@ -1738,7 +1738,7 @@ vzeroupper
17381738

17391739
# CHECK: Resource pressure per iteration:
17401740
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
1741-
# CHECK-NEXT: 175.00 175.00 - - - - - 144.25 227.25 223.75 315.75 -
1741+
# CHECK-NEXT: 175.50 175.50 - - - - - 150.25 230.25 226.75 315.75 -
17421742

17431743
# CHECK: Resource pressure by instruction:
17441744
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -2141,8 +2141,8 @@ vzeroupper
21412141
# CHECK-NEXT: - - - - - - - - - - - - vpcmpistri $1, (%rax), %xmm2
21422142
# CHECK-NEXT: - - - - - - - - - - - - vpcmpistrm $1, %xmm0, %xmm2
21432143
# CHECK-NEXT: - - - - - - - - - - - - vpcmpistrm $1, (%rax), %xmm2
2144-
# CHECK-NEXT: - - - - - - - - - - - - vperm2f128 $1, %ymm0, %ymm1, %ymm2
2145-
# CHECK-NEXT: - - - - - - - - - - - - vperm2f128 $1, (%rax), %ymm1, %ymm2
2144+
# CHECK-NEXT: - - - - - - - 3.00 1.50 1.50 - - vperm2f128 $1, %ymm0, %ymm1, %ymm2
2145+
# CHECK-NEXT: 0.50 0.50 - - - - - 3.00 1.50 1.50 - - vperm2f128 $1, (%rax), %ymm1, %ymm2
21462146
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpermilpd $1, %xmm0, %xmm2
21472147
# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpermilpd $1, (%rax), %xmm2
21482148
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpermilpd %xmm0, %xmm1, %xmm2

llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -558,8 +558,8 @@ vpxor (%rax), %ymm1, %ymm2
558558
# CHECK-NEXT: 2 8 1.00 * vpcmpgtq (%rax), %ymm1, %ymm2
559559
# CHECK-NEXT: 2 1 0.67 vpcmpgtw %ymm0, %ymm1, %ymm2
560560
# CHECK-NEXT: 2 8 0.67 * vpcmpgtw (%rax), %ymm1, %ymm2
561-
# CHECK-NEXT: 1 100 0.25 vperm2i128 $1, %ymm0, %ymm1, %ymm2
562-
# CHECK-NEXT: 1 100 0.25 * vperm2i128 $1, (%rax), %ymm1, %ymm2
561+
# CHECK-NEXT: 8 3 3.00 vperm2i128 $1, %ymm0, %ymm1, %ymm2
562+
# CHECK-NEXT: 12 8 3.00 * vperm2i128 $1, (%rax), %ymm1, %ymm2
563563
# CHECK-NEXT: 2 2 1.00 vpermd %ymm0, %ymm1, %ymm2
564564
# CHECK-NEXT: 2 9 1.00 * vpermd (%rax), %ymm1, %ymm2
565565
# CHECK-NEXT: 2 2 1.00 vpermpd $1, %ymm0, %ymm2
@@ -778,7 +778,7 @@ vpxor (%rax), %ymm1, %ymm2
778778

779779
# CHECK: Resource pressure per iteration:
780780
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
781-
# CHECK-NEXT: 66.50 66.50 - - - - - 120.17 239.17 158.00 66.67 -
781+
# CHECK-NEXT: 67.00 67.00 - - - - - 126.17 242.17 161.00 66.67 -
782782

783783
# CHECK: Resource pressure by instruction:
784784
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -880,8 +880,8 @@ vpxor (%rax), %ymm1, %ymm2
880880
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - 1.00 - vpcmpgtq (%rax), %ymm1, %ymm2
881881
# CHECK-NEXT: - - - - - - - 0.67 0.67 - 0.67 - vpcmpgtw %ymm0, %ymm1, %ymm2
882882
# CHECK-NEXT: 0.50 0.50 - - - - - 0.67 0.67 - 0.67 - vpcmpgtw (%rax), %ymm1, %ymm2
883-
# CHECK-NEXT: - - - - - - - - - - - - vperm2i128 $1, %ymm0, %ymm1, %ymm2
884-
# CHECK-NEXT: - - - - - - - - - - - - vperm2i128 $1, (%rax), %ymm1, %ymm2
883+
# CHECK-NEXT: - - - - - - - 3.00 1.50 1.50 - - vperm2i128 $1, %ymm0, %ymm1, %ymm2
884+
# CHECK-NEXT: 0.50 0.50 - - - - - 3.00 1.50 1.50 - - vperm2i128 $1, (%rax), %ymm1, %ymm2
885885
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vpermd %ymm0, %ymm1, %ymm2
886886
# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vpermd (%rax), %ymm1, %ymm2
887887
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vpermpd $1, %ymm0, %ymm2

0 commit comments

Comments
 (0)