Skip to content

[X86] Split rr/rm CVT schedules on SNB/HSW/BDW #117494

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 24, 2024

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Nov 24, 2024

The folded load variants almost never require Port5 for length changing conversions (just for SNB ymm cases), and don't have an extra uop for the load.

Confirmed with a mixture of Agner + uops.info comparisons.

The folded load variants almost never require Port5 for length changing conversions (just for SNB ymm cases), and don't have an extra uop for the load.

Confirmed with a mixture of Agner + uops.info comparisons.
@llvmbot
Copy link
Member

llvmbot commented Nov 24, 2024

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

The folded load variants almost never require Port5 for length changing conversions (just for SNB ymm cases), and don't have an extra uop for the load.

Confirmed with a mixture of Agner + uops.info comparisons.


Patch is 48.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117494.diff

15 Files Affected:

  • (modified) llvm/lib/Target/X86/X86SchedBroadwell.td (+11-6)
  • (modified) llvm/lib/Target/X86/X86SchedHaswell.td (+17-17)
  • (modified) llvm/lib/Target/X86/X86SchedSandyBridge.td (+4-3)
  • (modified) llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s (+3-3)
  • (modified) llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s (+7-7)
  • (modified) llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s (+5-5)
  • (modified) llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s (+5-5)
  • (modified) llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s (+13-13)
  • (modified) llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s (+13-13)
  • (modified) llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s (+3-3)
  • (modified) llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s (+7-7)
  • (modified) llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s (+1-1)
  • (modified) llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s (+5-5)
  • (modified) llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s (+5-5)
  • (modified) llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s (+3-3)
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 699ca91cd1f8f4..e5b3cc4b6c90e6 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -367,21 +367,26 @@ defm : BWWriteResPair<WriteCvtPD2IY,  [BWPort1,BWPort5], 6, [1,1], 2, 6>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
 
 defm : X86WriteRes<WriteCvtI2SS,      [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PS,              [BWPort1], 3,   [1], 1>;
+defm : X86WriteRes<WriteCvtI2PSY,             [BWPort1], 3,   [1], 1>;
 defm : X86WriteRes<WriteCvtI2SSLd,   [BWPort1,BWPort23], 9, [1,1], 2>;
-defm : BWWriteResPair<WriteCvtI2PS,   [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtI2PSY,  [BWPort1], 3, [1], 1, 6>;
+defm : X86WriteRes<WriteCvtI2PSLd,   [BWPort1,BWPort23], 8, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PSYLd,  [BWPort1,BWPort23], 9, [1,1], 2>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
 defm : X86WriteRes<WriteCvtI2SD,      [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PD,      [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDY,     [BWPort1,BWPort5], 6, [1,1], 2>;
 defm : X86WriteRes<WriteCvtI2SDLd,   [BWPort1,BWPort23], 9, [1,1], 2>;
-defm : BWWriteResPair<WriteCvtI2PD,   [BWPort1,BWPort5], 4, [1,1], 2, 5>;
-defm : BWWriteResPair<WriteCvtI2PDY,  [BWPort1,BWPort5], 6, [1,1], 2, 5>;
+defm : X86WriteRes<WriteCvtI2PDLd,   [BWPort1,BWPort23], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDYLd,  [BWPort1,BWPort23],11, [1,1], 2>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
 
 defm : X86WriteRes<WriteCvtSS2SD,     [BWPort0,BWPort5], 2, [1,1], 2>;
-defm : X86WriteRes<WriteCvtSS2SDLd,  [BWPort0,BWPort23], 6, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PD,     [BWPort0,BWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDY,    [BWPort0,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtSS2SDLd,  [BWPort0,BWPort23], 6, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PDLd,  [BWPort0,BWPort23], 6, [1,1], 2>;
-defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort0,BWPort5], 4, [1,1], 2, 5>;
+defm : X86WriteRes<WriteCvtPS2PDYLd, [BWPort0,BWPort23], 9, [1,1], 2>;
 defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
 defm : BWWriteResPair<WriteCvtSD2SS,  [BWPort1,BWPort5], 4, [1,1], 2, 5>;
 defm : BWWriteResPair<WriteCvtPD2PS,  [BWPort1,BWPort5], 4, [1,1], 2, 5>;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index b820418bb55191..59874be34f5a28 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -364,22 +364,30 @@ defm : HWWriteResPair<WriteCvtPS2IY,  [HWPort1], 3, [1], 1, 7>;
 defm : HWWriteResPair<WriteCvtPS2IZ,  [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
 
 defm : X86WriteRes<WriteCvtI2SD,      [HWPort1,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PD,      [HWPort1,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDY,     [HWPort1,HWPort5], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDZ,     [HWPort1,HWPort5], 6, [1,1], 2>; // Unsupported = 1
 defm : X86WriteRes<WriteCvtI2SDLd,   [HWPort1,HWPort23], 9, [1,1], 2>;
-defm : HWWriteResPair<WriteCvtI2PD,   [HWPort1,HWPort5], 4, [1,1], 2, 6>;
-defm : HWWriteResPair<WriteCvtI2PDY,  [HWPort1,HWPort5], 6, [1,1], 2, 6>;
-defm : HWWriteResPair<WriteCvtI2PDZ,  [HWPort1,HWPort5], 6, [1,1], 2, 6>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtI2PDLd,   [HWPort1,HWPort23],10, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDYLd,  [HWPort1,HWPort23],12, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDZLd,  [HWPort1,HWPort23],12, [1,1], 2>; // Unsupported = 1
 defm : X86WriteRes<WriteCvtI2SS,      [HWPort1,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PS,              [HWPort1], 3,   [1], 1>;
+defm : X86WriteRes<WriteCvtI2PSY,             [HWPort1], 3,   [1], 1>;
+defm : X86WriteRes<WriteCvtI2PSZ,             [HWPort1], 3,   [1], 1>; // Unsupported = 1
 defm : X86WriteRes<WriteCvtI2SSLd,   [HWPort1,HWPort23], 9, [1,1], 2>;
-defm : HWWriteResPair<WriteCvtI2PS,   [HWPort1], 3, [1], 1, 6>;
-defm : HWWriteResPair<WriteCvtI2PSY,  [HWPort1], 3, [1], 1, 7>;
-defm : HWWriteResPair<WriteCvtI2PSZ,  [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtI2PSLd,   [HWPort1,HWPort23], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PSYLd,  [HWPort1,HWPort23],10, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PSZLd,  [HWPort1,HWPort23],10, [1,1], 2>; // Unsupported = 1
 
 defm : X86WriteRes<WriteCvtSS2SD,     [HWPort0,HWPort5], 2, [1,1], 2>;
-defm : X86WriteRes<WriteCvtSS2SDLd,  [HWPort0,HWPort23], 7, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PD,     [HWPort0,HWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDY,    [HWPort0,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDZ,    [HWPort0,HWPort5], 4, [1,1], 2>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtSS2SDLd,  [HWPort0,HWPort23], 7, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PDLd,  [HWPort0,HWPort23], 6, [1,1], 2>;
-defm : HWWriteResPair<WriteCvtPS2PDY, [HWPort0,HWPort5], 4, [1,1], 2, 6>;
-defm : HWWriteResPair<WriteCvtPS2PDZ, [HWPort0,HWPort5], 4, [1,1], 2, 6>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtPS2PDYLd, [HWPort0,HWPort23],10, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDZLd, [HWPort0,HWPort23],10, [1,1], 2>; // Unsupported = 1
 defm : HWWriteResPair<WriteCvtSD2SS,  [HWPort1,HWPort5], 4, [1,1], 2, 5>;
 defm : HWWriteResPair<WriteCvtPD2PS,  [HWPort1,HWPort5], 4, [1,1], 2, 6>;
 defm : HWWriteResPair<WriteCvtPD2PSY, [HWPort1,HWPort5], 6, [1,1], 2, 6>;
@@ -983,7 +991,6 @@ def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> {
   let NumMicroOps = 2;
   let ReleaseAtCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup12], (instrs MMX_CVTPI2PSrm)>;
 def: InstRW<[HWWriteResGroup12], (instregex "P(DEP|EXT)(32|64)rm")>;
 
 def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> {
@@ -1349,13 +1356,6 @@ def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> {
 }
 def: InstRW<[HWWriteResGroup75], (instregex "FICOM(P?)(16|32)m")>;
 
-def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
-  let Latency = 9;
-  let NumMicroOps = 3;
-  let ReleaseAtCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDrm)>;
-
 def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> {
   let Latency = 9;
   let NumMicroOps = 3;
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 7be9f51bcd46bd..6939b1227d0a61 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -348,13 +348,14 @@ defm : X86WriteRes<WriteCvtI2PDLd,   [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>
 defm : X86WriteRes<WriteCvtI2PDYLd,  [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
 defm : X86WriteRes<WriteCvtI2PDZLd,  [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>; // Unsupported = 1
 
-defm : SBWriteResPair<WriteCvtSS2SD,  [SBPort0], 1, [1], 1, 6>;
+defm : X86WriteRes<WriteCvtSS2SD,     [SBPort0,SBPort5], 1, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PD,     [SBPort0,SBPort5], 2, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PDY,    [SBPort0,SBPort5], 2, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PDZ,    [SBPort0,SBPort5], 2, [1,1], 2>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtSS2SDLd,  [SBPort0,SBPort23], 7, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PDLd,  [SBPort0,SBPort23], 7, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPS2PDYLd, [SBPort0,SBPort23], 7, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPS2PDZLd, [SBPort0,SBPort23], 7, [1,1], 2>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtPS2PDYLd, [SBPort0,SBPort5,SBPort23], 7, [1,1,1], 3>;
+defm : X86WriteRes<WriteCvtPS2PDZLd, [SBPort0,SBPort5,SBPort23], 7, [1,1,1], 3>; // Unsupported = 1
 defm : SBWriteResPair<WriteCvtSD2SS,  [SBPort1,SBPort5], 4, [1,1], 2, 6>;
 defm : SBWriteResPair<WriteCvtPD2PS,  [SBPort1,SBPort5], 4, [1,1], 2, 6>;
 defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>;
diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s
index df0053a1dcb9b5..25f79397fa071d 100644
--- a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s
@@ -448,7 +448,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        cvtsi2sd	%rcx, %xmm2
 # CHECK-NEXT:  2      9     1.00    *                   cvtsi2sdl	(%rax), %xmm2
 # CHECK-NEXT:  2      9     1.00    *                   cvtsi2sdq	(%rax), %xmm2
-# CHECK-NEXT:  1      1     1.00                        cvtss2sd	%xmm0, %xmm2
+# CHECK-NEXT:  2      1     1.00                        cvtss2sd	%xmm0, %xmm2
 # CHECK-NEXT:  2      7     1.00    *                   cvtss2sd	(%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        cvttpd2dq	%xmm0, %xmm2
 # CHECK-NEXT:  3      10    1.00    *                   cvttpd2dq	(%rax), %xmm2
@@ -687,7 +687,7 @@ xorpd       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     172.00 75.83  117.33 17.00  101.83 67.00  67.00
+# CHECK-NEXT:  -     172.00 75.83  117.33 17.00  102.83 67.00  67.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -732,7 +732,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -     cvtsi2sd	%rcx, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   cvtsi2sdl	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   cvtsi2sdq	(%rax), %xmm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     cvtss2sd	%xmm0, %xmm2
+# CHECK-NEXT:  -      -     1.00    -      -     1.00    -      -     cvtss2sd	%xmm0, %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   cvtss2sd	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -     cvttpd2dq	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -     1.00   0.50   0.50   cvttpd2dq	(%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
index 1b196b4355a6d4..028625013a85cc 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
@@ -1115,9 +1115,9 @@ vzeroupper
 # CHECK-NEXT:  1      3     1.00                        vcomiss	%xmm0, %xmm1
 # CHECK-NEXT:  2      8     1.00    *                   vcomiss	(%rax), %xmm1
 # CHECK-NEXT:  2      4     1.00                        vcvtdq2pd	%xmm0, %xmm2
-# CHECK-NEXT:  3      9     1.00    *                   vcvtdq2pd	(%rax), %xmm2
+# CHECK-NEXT:  2      9     1.00    *                   vcvtdq2pd	(%rax), %xmm2
 # CHECK-NEXT:  2      6     1.00                        vcvtdq2pd	%xmm0, %ymm2
-# CHECK-NEXT:  3      11    1.00    *                   vcvtdq2pd	(%rax), %ymm2
+# CHECK-NEXT:  2      11    1.00    *                   vcvtdq2pd	(%rax), %ymm2
 # CHECK-NEXT:  1      3     1.00                        vcvtdq2ps	%xmm0, %xmm2
 # CHECK-NEXT:  2      8     1.00    *                   vcvtdq2ps	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        vcvtdq2ps	%ymm0, %ymm2
@@ -1137,7 +1137,7 @@ vzeroupper
 # CHECK-NEXT:  2      2     1.00                        vcvtps2pd	%xmm0, %xmm2
 # CHECK-NEXT:  2      6     1.00    *                   vcvtps2pd	(%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        vcvtps2pd	%xmm0, %ymm2
-# CHECK-NEXT:  3      9     1.00    *                   vcvtps2pd	(%rax), %ymm2
+# CHECK-NEXT:  2      9     1.00    *                   vcvtps2pd	(%rax), %ymm2
 # CHECK-NEXT:  2      4     1.00                        vcvtsd2si	%xmm0, %ecx
 # CHECK-NEXT:  2      4     1.00                        vcvtsd2si	%xmm0, %rcx
 # CHECK-NEXT:  3      9     1.00    *                   vcvtsd2si	(%rax), %ecx
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     257.00 216.25 247.25 173.17 173.17 38.00  424.25 3.25   12.67
+# CHECK-NEXT:  -     257.00 216.25 247.25 173.17 173.17 38.00  421.25 3.25   12.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -1825,9 +1825,9 @@ vzeroupper
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vcomiss	%xmm0, %xmm1
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcomiss	(%rax), %xmm1
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtdq2pd	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvtdq2pd	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtdq2pd	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtdq2pd	%xmm0, %ymm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvtdq2pd	(%rax), %ymm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtdq2pd	(%rax), %ymm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vcvtdq2ps	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtdq2ps	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vcvtdq2ps	%ymm0, %ymm2
@@ -1847,7 +1847,7 @@ vzeroupper
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     vcvtps2pd	%xmm0, %xmm2
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     vcvtps2pd	(%rax), %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     vcvtps2pd	%xmm0, %ymm2
-# CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     vcvtps2pd	(%rax), %ymm2
+# CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     vcvtps2pd	(%rax), %ymm2
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -      -      -     vcvtsd2si	%xmm0, %ecx
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -      -      -     vcvtsd2si	%xmm0, %rcx
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     vcvtsd2si	(%rax), %ecx
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s
index e76d90521afa9c..8851be4679a1e9 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s
@@ -423,7 +423,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        comisd	%xmm0, %xmm1
 # CHECK-NEXT:  2      8     1.00    *                   comisd	(%rax), %xmm1
 # CHECK-NEXT:  2      4     1.00                        cvtdq2pd	%xmm0, %xmm2
-# CHECK-NEXT:  3      9     1.00    *                   cvtdq2pd	(%rax), %xmm2
+# CHECK-NEXT:  2      9     1.00    *                   cvtdq2pd	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        cvtdq2ps	%xmm0, %xmm2
 # CHECK-NEXT:  2      8     1.00    *                   cvtdq2ps	(%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        cvtpd2dq	%xmm0, %xmm2
@@ -433,7 +433,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        cvtpd2ps	%xmm0, %xmm2
 # CHECK-NEXT:  3      9     1.00    *                   cvtpd2ps	(%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        cvtpi2pd	%mm0, %xmm2
-# CHECK-NEXT:  3      9     1.00    *                   cvtpi2pd	(%rax), %xmm2
+# CHECK-NEXT:  2      9     1.00    *                   cvtpi2pd	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        cvtps2dq	%xmm0, %xmm2
 # CHECK-NEXT:  2      8     1.00    *                   cvtps2dq	(%rax), %xmm2
 # CHECK-NEXT:  2      2     1.00                        cvtps2pd	%xmm0, %xmm2
@@ -689,7 +689,7 @@ xorpd       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     78.00  70.75  95.75  63.17  63.17  14.00  119.25 2.25   4.67
+# CHECK-NEXT:  -     78.00  70.75  95.75  63.17  63.17  14.00  117.25 2.25   4.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -709,7 +709,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     comisd	%xmm0, %xmm1
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     comisd	(%rax), %xmm1
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     cvtdq2pd	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     cvtdq2pd	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     cvtdq2pd	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     cvtdq2ps	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     cvtdq2ps	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     cvtpd2dq	%xmm0, %xmm2
@@ -719,7 +719,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     cvtpd2ps	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     cvtpd2ps	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     cvtpi2pd	%mm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     cvtpi2pd	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     cvtpi2pd	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     cvtps2dq	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     cvtps2dq	(%rax), %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     cvtps2pd	%xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
index 49db25cb0bdfb1..7f07fd56fe60dc 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
@@ -1137,7 +1137,7 @@ vzeroupper
 # CHECK-NEXT:  2      2     1.00                        vcvtps2pd	%xmm0, %xmm2
 # CHECK-NEXT:  2      7     1.00    *                   vcvtps2pd	(%rax), %xmm2
 # CHECK-NEXT:  2      2     1.00                        vcvtps2pd	%xmm0, %ymm2
-# CHECK-NEXT:  2      7     1.00    *                   vcvtps2pd	(%rax), %ymm2
+# CHECK-NEXT:  3      7     1.00    *                   vcvtps2pd	(%rax), %ymm2
 # CHECK-NEXT:  2      5     1.00                        vcvtsd2si	%xmm0, %ecx
 # CHECK-NEXT:  2      5     1.00                        vcvtsd2si	%xmm0, %rcx
 # CHECK-NEXT:  3      10    1.00    *                   vcvtsd2si	(%rax), %ecx
@@ -1152,7 +1152,7 @@ vzeroupper
 # CHECK-NEXT:  3      5     2.00                        vcvtsi2ss	%rcx, %xmm0, %xmm2
 # CHECK-NEXT:  3      10    1.00    *                   vcvtsi2ssl	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  3      10    1.00    *                   vcvtsi2ssq	(%rax), %xmm0, %xmm2
-# CHECK-NEXT:  1      1     1.00                        vcvtss2sd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                        vcvtss2sd	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  2      7     1.00    *                   vcvtss2sd	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  2      5     1.00                        vcvtss2si	%xmm0, %ecx
 # CHECK-NEXT:  2      5     1.00                        vcvtss2si	%xmm0, %rcx
@@ -1734,7 +1734,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]   ...
[truncated]

@RKSimon RKSimon merged commit 6cfaddf into llvm:main Nov 24, 2024
10 checks passed
@RKSimon RKSimon deleted the x86-sched-snb-hsw-cvt branch November 24, 2024 16:19
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants