Skip to content

Commit 630820b

Browse files
committed
[X86][SLM] Adjust XMM non-PMULLD throughput costs to half rate.
Match what's reported in the costs table, Agner's tables and the Intel AOM
1 parent 8ffeb5c commit 630820b

File tree

5 files changed

+57
-57
lines changed

5 files changed

+57
-57
lines changed

llvm/lib/Target/X86/X86ScheduleSLM.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,8 +341,8 @@ defm : SLMWriteResPair<WriteVecALUX, [SLM_FPC_RSV01], 1>;
341341
defm : SLMWriteResPair<WriteVecALUY, [SLM_FPC_RSV01], 1>;
342342
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
343343
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
344-
defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 4>;
345-
defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 4>;
344+
defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 5, [2], 2>;
345+
defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 5, [2], 2>;
346346
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
347347
// FIXME: The below is closer to correct, but caused some perf regressions.
348348
//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;

llvm/test/CodeGen/X86/slow-pmulld.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -537,40 +537,40 @@ define <8 x i32> @test_mul_v8i32_v8i16(<8 x i16> %A) {
537537
define <16 x i32> @test_mul_v16i32_v16i16(<16 x i16> %A) {
538538
; SLM32-LABEL: test_mul_v16i32_v16i16:
539539
; SLM32: # %bb.0:
540-
; SLM32-NEXT: movdqa %xmm1, %xmm3
541-
; SLM32-NEXT: movdqa %xmm0, %xmm1
540+
; SLM32-NEXT: movdqa %xmm0, %xmm4
542541
; SLM32-NEXT: movdqa {{.*#+}} xmm0 = [18778,18778,18778,18778,18778,18778,18778,18778]
543-
; SLM32-NEXT: movdqa %xmm1, %xmm2
544-
; SLM32-NEXT: movdqa %xmm3, %xmm4
545-
; SLM32-NEXT: pmullw %xmm0, %xmm1
542+
; SLM32-NEXT: movdqa %xmm1, %xmm3
543+
; SLM32-NEXT: movdqa %xmm4, %xmm2
544+
; SLM32-NEXT: pmullw %xmm0, %xmm4
546545
; SLM32-NEXT: pmulhuw %xmm0, %xmm2
547546
; SLM32-NEXT: pmullw %xmm0, %xmm3
548-
; SLM32-NEXT: pmulhuw %xmm0, %xmm4
549-
; SLM32-NEXT: movdqa %xmm1, %xmm0
550-
; SLM32-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
547+
; SLM32-NEXT: pmulhuw %xmm0, %xmm1
548+
; SLM32-NEXT: movdqa %xmm4, %xmm0
549+
; SLM32-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
551550
; SLM32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
552551
; SLM32-NEXT: movdqa %xmm3, %xmm2
553-
; SLM32-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
554-
; SLM32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
552+
; SLM32-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
553+
; SLM32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
554+
; SLM32-NEXT: movdqa %xmm4, %xmm1
555555
; SLM32-NEXT: retl
556556
;
557557
; SLM64-LABEL: test_mul_v16i32_v16i16:
558558
; SLM64: # %bb.0:
559-
; SLM64-NEXT: movdqa %xmm1, %xmm3
560-
; SLM64-NEXT: movdqa %xmm0, %xmm1
559+
; SLM64-NEXT: movdqa %xmm0, %xmm4
561560
; SLM64-NEXT: movdqa {{.*#+}} xmm0 = [18778,18778,18778,18778,18778,18778,18778,18778]
562-
; SLM64-NEXT: movdqa %xmm1, %xmm2
563-
; SLM64-NEXT: movdqa %xmm3, %xmm4
564-
; SLM64-NEXT: pmullw %xmm0, %xmm1
561+
; SLM64-NEXT: movdqa %xmm1, %xmm3
562+
; SLM64-NEXT: movdqa %xmm4, %xmm2
563+
; SLM64-NEXT: pmullw %xmm0, %xmm4
565564
; SLM64-NEXT: pmulhuw %xmm0, %xmm2
566565
; SLM64-NEXT: pmullw %xmm0, %xmm3
567-
; SLM64-NEXT: pmulhuw %xmm0, %xmm4
568-
; SLM64-NEXT: movdqa %xmm1, %xmm0
569-
; SLM64-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
566+
; SLM64-NEXT: pmulhuw %xmm0, %xmm1
567+
; SLM64-NEXT: movdqa %xmm4, %xmm0
568+
; SLM64-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
570569
; SLM64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
571570
; SLM64-NEXT: movdqa %xmm3, %xmm2
572-
; SLM64-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
573-
; SLM64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
571+
; SLM64-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
572+
; SLM64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
573+
; SLM64-NEXT: movdqa %xmm4, %xmm1
574574
; SLM64-NEXT: retq
575575
;
576576
; SLOW32-LABEL: test_mul_v16i32_v16i16:

llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -563,8 +563,8 @@ xorpd (%rax), %xmm2
563563
# CHECK-NEXT: 1 1 1.00 pextrw $1, %xmm0, %ecx
564564
# CHECK-NEXT: 1 1 1.00 pinsrw $1, %eax, %xmm0
565565
# CHECK-NEXT: 1 4 1.00 * pinsrw $1, (%rax), %xmm0
566-
# CHECK-NEXT: 1 4 1.00 pmaddwd %xmm0, %xmm2
567-
# CHECK-NEXT: 1 7 1.00 * pmaddwd (%rax), %xmm2
566+
# CHECK-NEXT: 2 5 2.00 pmaddwd %xmm0, %xmm2
567+
# CHECK-NEXT: 2 8 2.00 * pmaddwd (%rax), %xmm2
568568
# CHECK-NEXT: 1 1 0.50 pmaxsw %xmm0, %xmm2
569569
# CHECK-NEXT: 1 4 1.00 * pmaxsw (%rax), %xmm2
570570
# CHECK-NEXT: 1 1 0.50 pmaxub %xmm0, %xmm2
@@ -574,16 +574,16 @@ xorpd (%rax), %xmm2
574574
# CHECK-NEXT: 1 1 0.50 pminub %xmm0, %xmm2
575575
# CHECK-NEXT: 1 4 1.00 * pminub (%rax), %xmm2
576576
# CHECK-NEXT: 1 4 1.00 pmovmskb %xmm0, %ecx
577-
# CHECK-NEXT: 1 4 1.00 pmulhuw %xmm0, %xmm2
578-
# CHECK-NEXT: 1 7 1.00 * pmulhuw (%rax), %xmm2
579-
# CHECK-NEXT: 1 4 1.00 pmulhw %xmm0, %xmm2
580-
# CHECK-NEXT: 1 7 1.00 * pmulhw (%rax), %xmm2
581-
# CHECK-NEXT: 1 4 1.00 pmullw %xmm0, %xmm2
582-
# CHECK-NEXT: 1 7 1.00 * pmullw (%rax), %xmm2
577+
# CHECK-NEXT: 2 5 2.00 pmulhuw %xmm0, %xmm2
578+
# CHECK-NEXT: 2 8 2.00 * pmulhuw (%rax), %xmm2
579+
# CHECK-NEXT: 2 5 2.00 pmulhw %xmm0, %xmm2
580+
# CHECK-NEXT: 2 8 2.00 * pmulhw (%rax), %xmm2
581+
# CHECK-NEXT: 2 5 2.00 pmullw %xmm0, %xmm2
582+
# CHECK-NEXT: 2 8 2.00 * pmullw (%rax), %xmm2
583583
# CHECK-NEXT: 1 4 1.00 pmuludq %mm0, %mm2
584584
# CHECK-NEXT: 1 7 1.00 * pmuludq (%rax), %mm2
585-
# CHECK-NEXT: 1 4 1.00 pmuludq %xmm0, %xmm2
586-
# CHECK-NEXT: 1 7 1.00 * pmuludq (%rax), %xmm2
585+
# CHECK-NEXT: 2 5 2.00 pmuludq %xmm0, %xmm2
586+
# CHECK-NEXT: 2 8 2.00 * pmuludq (%rax), %xmm2
587587
# CHECK-NEXT: 1 1 0.50 por %xmm0, %xmm2
588588
# CHECK-NEXT: 1 4 1.00 * por (%rax), %xmm2
589589
# CHECK-NEXT: 1 4 1.00 psadbw %xmm0, %xmm2
@@ -687,7 +687,7 @@ xorpd (%rax), %xmm2
687687

688688
# CHECK: Resource pressure per iteration:
689689
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
690-
# CHECK-NEXT: - 412.00 12.00 174.50 96.50 3.00 3.00 134.00
690+
# CHECK-NEXT: - 412.00 12.00 184.50 96.50 3.00 3.00 134.00
691691

692692
# CHECK: Resource pressure by instruction:
693693
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
@@ -847,8 +847,8 @@ xorpd (%rax), %xmm2
847847
# CHECK-NEXT: - - - 1.00 - - - - pextrw $1, %xmm0, %ecx
848848
# CHECK-NEXT: - - - 1.00 - - - - pinsrw $1, %eax, %xmm0
849849
# CHECK-NEXT: - - - 1.00 - - - 1.00 pinsrw $1, (%rax), %xmm0
850-
# CHECK-NEXT: - - - 1.00 - - - - pmaddwd %xmm0, %xmm2
851-
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmaddwd (%rax), %xmm2
850+
# CHECK-NEXT: - - - 2.00 - - - - pmaddwd %xmm0, %xmm2
851+
# CHECK-NEXT: - - - 2.00 - - - 1.00 pmaddwd (%rax), %xmm2
852852
# CHECK-NEXT: - - - 0.50 0.50 - - - pmaxsw %xmm0, %xmm2
853853
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 pmaxsw (%rax), %xmm2
854854
# CHECK-NEXT: - - - 0.50 0.50 - - - pmaxub %xmm0, %xmm2
@@ -858,16 +858,16 @@ xorpd (%rax), %xmm2
858858
# CHECK-NEXT: - - - 0.50 0.50 - - - pminub %xmm0, %xmm2
859859
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 pminub (%rax), %xmm2
860860
# CHECK-NEXT: - - - - 1.00 - - - pmovmskb %xmm0, %ecx
861-
# CHECK-NEXT: - - - 1.00 - - - - pmulhuw %xmm0, %xmm2
862-
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmulhuw (%rax), %xmm2
863-
# CHECK-NEXT: - - - 1.00 - - - - pmulhw %xmm0, %xmm2
864-
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmulhw (%rax), %xmm2
865-
# CHECK-NEXT: - - - 1.00 - - - - pmullw %xmm0, %xmm2
866-
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmullw (%rax), %xmm2
861+
# CHECK-NEXT: - - - 2.00 - - - - pmulhuw %xmm0, %xmm2
862+
# CHECK-NEXT: - - - 2.00 - - - 1.00 pmulhuw (%rax), %xmm2
863+
# CHECK-NEXT: - - - 2.00 - - - - pmulhw %xmm0, %xmm2
864+
# CHECK-NEXT: - - - 2.00 - - - 1.00 pmulhw (%rax), %xmm2
865+
# CHECK-NEXT: - - - 2.00 - - - - pmullw %xmm0, %xmm2
866+
# CHECK-NEXT: - - - 2.00 - - - 1.00 pmullw (%rax), %xmm2
867867
# CHECK-NEXT: - - - 1.00 - - - - pmuludq %mm0, %mm2
868868
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmuludq (%rax), %mm2
869-
# CHECK-NEXT: - - - 1.00 - - - - pmuludq %xmm0, %xmm2
870-
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmuludq (%rax), %xmm2
869+
# CHECK-NEXT: - - - 2.00 - - - - pmuludq %xmm0, %xmm2
870+
# CHECK-NEXT: - - - 2.00 - - - 1.00 pmuludq (%rax), %xmm2
871871
# CHECK-NEXT: - - - 0.50 0.50 - - - por %xmm0, %xmm2
872872
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 por (%rax), %xmm2
873873
# CHECK-NEXT: - - - 1.00 - - - - psadbw %xmm0, %xmm2

llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,8 @@ roundss $1, (%rax), %xmm2
237237
# CHECK-NEXT: 1 4 1.00 * pmovzxwd (%rax), %xmm2
238238
# CHECK-NEXT: 1 1 1.00 pmovzxwq %xmm0, %xmm2
239239
# CHECK-NEXT: 1 4 1.00 * pmovzxwq (%rax), %xmm2
240-
# CHECK-NEXT: 1 4 1.00 pmuldq %xmm0, %xmm2
241-
# CHECK-NEXT: 1 7 1.00 * pmuldq (%rax), %xmm2
240+
# CHECK-NEXT: 2 5 2.00 pmuldq %xmm0, %xmm2
241+
# CHECK-NEXT: 2 8 2.00 * pmuldq (%rax), %xmm2
242242
# CHECK-NEXT: 1 4 1.00 pmulld %xmm0, %xmm2
243243
# CHECK-NEXT: 1 7 1.00 * pmulld (%rax), %xmm2
244244
# CHECK-NEXT: 1 1 0.50 ptest %xmm0, %xmm1
@@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2
264264

265265
# CHECK: Resource pressure per iteration:
266266
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
267-
# CHECK-NEXT: - - - 88.00 25.00 - - 54.00
267+
# CHECK-NEXT: - - - 90.00 25.00 - - 54.00
268268

269269
# CHECK: Resource pressure by instruction:
270270
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
@@ -350,8 +350,8 @@ roundss $1, (%rax), %xmm2
350350
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmovzxwd (%rax), %xmm2
351351
# CHECK-NEXT: - - - 1.00 - - - - pmovzxwq %xmm0, %xmm2
352352
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmovzxwq (%rax), %xmm2
353-
# CHECK-NEXT: - - - 1.00 - - - - pmuldq %xmm0, %xmm2
354-
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmuldq (%rax), %xmm2
353+
# CHECK-NEXT: - - - 2.00 - - - - pmuldq %xmm0, %xmm2
354+
# CHECK-NEXT: - - - 2.00 - - - 1.00 pmuldq (%rax), %xmm2
355355
# CHECK-NEXT: - - - 1.00 - - - - pmulld %xmm0, %xmm2
356356
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmulld (%rax), %xmm2
357357
# CHECK-NEXT: - - - 0.50 0.50 - - - ptest %xmm0, %xmm1

llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -148,12 +148,12 @@ psignw (%rax), %xmm2
148148
# CHECK-NEXT: 1 4 1.00 * phsubw (%rax), %xmm2
149149
# CHECK-NEXT: 1 4 1.00 pmaddubsw %mm0, %mm2
150150
# CHECK-NEXT: 1 7 1.00 * pmaddubsw (%rax), %mm2
151-
# CHECK-NEXT: 1 4 1.00 pmaddubsw %xmm0, %xmm2
152-
# CHECK-NEXT: 1 7 1.00 * pmaddubsw (%rax), %xmm2
151+
# CHECK-NEXT: 2 5 2.00 pmaddubsw %xmm0, %xmm2
152+
# CHECK-NEXT: 2 8 2.00 * pmaddubsw (%rax), %xmm2
153153
# CHECK-NEXT: 1 4 1.00 pmulhrsw %mm0, %mm2
154154
# CHECK-NEXT: 1 7 1.00 * pmulhrsw (%rax), %mm2
155-
# CHECK-NEXT: 1 4 1.00 pmulhrsw %xmm0, %xmm2
156-
# CHECK-NEXT: 1 7 1.00 * pmulhrsw (%rax), %xmm2
155+
# CHECK-NEXT: 2 5 2.00 pmulhrsw %xmm0, %xmm2
156+
# CHECK-NEXT: 2 8 2.00 * pmulhrsw (%rax), %xmm2
157157
# CHECK-NEXT: 1 1 1.00 pshufb %mm0, %mm2
158158
# CHECK-NEXT: 1 4 1.00 * pshufb (%rax), %mm2
159159
# CHECK-NEXT: 4 5 5.00 pshufb %xmm0, %xmm2
@@ -183,7 +183,7 @@ psignw (%rax), %xmm2
183183

184184
# CHECK: Resource pressure per iteration:
185185
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
186-
# CHECK-NEXT: - - - 48.00 24.00 - - 32.00
186+
# CHECK-NEXT: - - - 52.00 24.00 - - 32.00
187187

188188
# CHECK: Resource pressure by instruction:
189189
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
@@ -229,12 +229,12 @@ psignw (%rax), %xmm2
229229
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubw (%rax), %xmm2
230230
# CHECK-NEXT: - - - 1.00 - - - - pmaddubsw %mm0, %mm2
231231
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmaddubsw (%rax), %mm2
232-
# CHECK-NEXT: - - - 1.00 - - - - pmaddubsw %xmm0, %xmm2
233-
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmaddubsw (%rax), %xmm2
232+
# CHECK-NEXT: - - - 2.00 - - - - pmaddubsw %xmm0, %xmm2
233+
# CHECK-NEXT: - - - 2.00 - - - 1.00 pmaddubsw (%rax), %xmm2
234234
# CHECK-NEXT: - - - 1.00 - - - - pmulhrsw %mm0, %mm2
235235
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmulhrsw (%rax), %mm2
236-
# CHECK-NEXT: - - - 1.00 - - - - pmulhrsw %xmm0, %xmm2
237-
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmulhrsw (%rax), %xmm2
236+
# CHECK-NEXT: - - - 2.00 - - - - pmulhrsw %xmm0, %xmm2
237+
# CHECK-NEXT: - - - 2.00 - - - 1.00 pmulhrsw (%rax), %xmm2
238238
# CHECK-NEXT: - - - 1.00 - - - - pshufb %mm0, %mm2
239239
# CHECK-NEXT: - - - 1.00 - - - 1.00 pshufb (%rax), %mm2
240240
# CHECK-NEXT: - - - 5.00 - - - - pshufb %xmm0, %xmm2

0 commit comments

Comments
 (0)