Skip to content

Commit 20eced2

Browse files
adibiagiotstellar
authored andcommitted
[X86][SchedModel] Add missing ReadAdvance for some arithmetic ops (PR51318 and PR51322).
This fixes a bug where implicit uses of EFLAGS were not marked as ReadAdvance in the RM/MR variants of ADC/SBB (PR51318) This also fixes the absence of ReadAdvance for the register operand of RMW arithmetic instructions (PR51322). Differential Revision: https://reviews.llvm.org/D107367 (cherry picked from commit 7a1a35a)
1 parent d8667f1 commit 20eced2

File tree

4 files changed

+49
-37
lines changed

4 files changed

+49
-37
lines changed

llvm/lib/Target/X86/X86InstrArithmetic.td

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,19 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
708708
mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
709709
Sched<[sched.Folded, sched.ReadAfterFold]>;
710710

711+
// BinOpRM - Instructions like "adc reg, reg, [mem]".
712+
// There is an implicit register read at the end of the operand sequence.
713+
class BinOpRM_ImplicitUse<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
714+
dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
715+
: ITy<opcode, MRMSrcMem, typeinfo, outlist,
716+
(ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
717+
mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
718+
Sched<[sched.Folded, sched.ReadAfterFold,
719+
// base, scale, index, offset, segment.
720+
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
721+
// implicit register read.
722+
sched.ReadAfterFold]>;
723+
711724
// BinOpRM_F - Instructions like "cmp reg, [mem]".
712725
class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
713726
SDNode opnode>
@@ -725,7 +738,7 @@ class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
725738
// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
726739
class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
727740
SDNode opnode>
728-
: BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
741+
: BinOpRM_ImplicitUse<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
729742
[(set typeinfo.RegClass:$dst, EFLAGS,
730743
(opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
731744
EFLAGS))]>;
@@ -805,15 +818,24 @@ class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
805818
SDNode opnode>
806819
: BinOpMR<opcode, mnemonic, typeinfo,
807820
[(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
808-
(implicit EFLAGS)]>, Sched<[WriteALURMW]>;
821+
(implicit EFLAGS)]>, Sched<[WriteALURMW,
822+
// base, scale, index, offset, segment
823+
ReadDefault, ReadDefault, ReadDefault,
824+
ReadDefault, ReadDefault,
825+
WriteALU.ReadAfterFold]>; // reg
809826

810827
// BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
811828
class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
812829
SDNode opnode>
813830
: BinOpMR<opcode, mnemonic, typeinfo,
814831
[(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
815832
addr:$dst),
816-
(implicit EFLAGS)]>, Sched<[WriteADCRMW]>;
833+
(implicit EFLAGS)]>, Sched<[WriteADCRMW,
834+
// base, scale, index, offset, segment
835+
ReadDefault, ReadDefault, ReadDefault,
836+
ReadDefault, ReadDefault,
837+
WriteALU.ReadAfterFold, // reg
838+
WriteALU.ReadAfterFold]>; // EFLAGS
817839

818840
// BinOpMR_F - Instructions like "cmp [mem], reg".
819841
class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,

llvm/test/tools/llvm-mca/X86/BtVer2/adc-sequence-readadvance.s

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,16 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
22
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=4 -timeline < %s | FileCheck %s
33

4-
# FIXME: PR51318
5-
# Missing read-advance for the implicit use of register EFLAGS.
6-
74
adc 4(%rsp), %eax
85

96
# CHECK: Iterations: 4
107
# CHECK-NEXT: Instructions: 4
11-
# CHECK-NEXT: Total Cycles: 19
8+
# CHECK-NEXT: Total Cycles: 10
129
# CHECK-NEXT: Total uOps: 4
1310

1411
# CHECK: Dispatch Width: 2
15-
# CHECK-NEXT: uOps Per Cycle: 0.21
16-
# CHECK-NEXT: IPC: 0.21
12+
# CHECK-NEXT: uOps Per Cycle: 0.40
13+
# CHECK-NEXT: IPC: 0.40
1714
# CHECK-NEXT: Block RThroughput: 1.0
1815

1916
# CHECK: Instruction Info:
@@ -52,13 +49,12 @@ adc 4(%rsp), %eax
5249
# CHECK-NEXT: 1.00 1.00 - - - - - 1.00 - - - - - - adcl 4(%rsp), %eax
5350

5451
# CHECK: Timeline view:
55-
# CHECK-NEXT: 012345678
5652
# CHECK-NEXT: Index 0123456789
5753

58-
# CHECK: [0,0] DeeeeER . . . adcl 4(%rsp), %eax
59-
# CHECK-NEXT: [1,0] D====eeeeER . . adcl 4(%rsp), %eax
60-
# CHECK-NEXT: [2,0] .D=======eeeeER. . adcl 4(%rsp), %eax
61-
# CHECK-NEXT: [3,0] .D===========eeeeER adcl 4(%rsp), %eax
54+
# CHECK: [0,0] DeeeeER . adcl 4(%rsp), %eax
55+
# CHECK-NEXT: [1,0] D=eeeeER . adcl 4(%rsp), %eax
56+
# CHECK-NEXT: [2,0] .D=eeeeER. adcl 4(%rsp), %eax
57+
# CHECK-NEXT: [3,0] .D==eeeeER adcl 4(%rsp), %eax
6258

6359
# CHECK: Average Wait times (based on the timeline view):
6460
# CHECK-NEXT: [0]: Executions
@@ -67,4 +63,4 @@ adc 4(%rsp), %eax
6763
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
6864

6965
# CHECK: [0] [1] [2] [3]
70-
# CHECK-NEXT: 0. 4 6.5 0.3 0.0 adcl 4(%rsp), %eax
66+
# CHECK-NEXT: 0. 4 2.0 0.3 0.0 adcl 4(%rsp), %eax

llvm/test/tools/llvm-mca/X86/BtVer2/rmw-adc-sequence-readadvance.s

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,17 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
22
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline < %s | FileCheck %s
33

4-
# FIXME: PR51322
5-
# Missing read-advance for register EAX.
6-
74
add %eax, %eax
85
adc %eax, 4(%rsp)
96

107
# CHECK: Iterations: 1
118
# CHECK-NEXT: Instructions: 2
12-
# CHECK-NEXT: Total Cycles: 10
9+
# CHECK-NEXT: Total Cycles: 9
1310
# CHECK-NEXT: Total uOps: 2
1411

1512
# CHECK: Dispatch Width: 2
16-
# CHECK-NEXT: uOps Per Cycle: 0.20
17-
# CHECK-NEXT: IPC: 0.20
13+
# CHECK-NEXT: uOps Per Cycle: 0.22
14+
# CHECK-NEXT: IPC: 0.22
1815
# CHECK-NEXT: Block RThroughput: 1.5
1916

2017
# CHECK: Instruction Info:
@@ -55,10 +52,10 @@ adc %eax, 4(%rsp)
5552
# CHECK-NEXT: 2.00 - - - - - - 1.00 - 1.00 - - - - adcl %eax, 4(%rsp)
5653

5754
# CHECK: Timeline view:
58-
# CHECK-NEXT: Index 0123456789
55+
# CHECK-NEXT: Index 012345678
5956

60-
# CHECK: [0,0] DeER . . addl %eax, %eax
61-
# CHECK-NEXT: [0,1] D=eeeeeeER adcl %eax, 4(%rsp)
57+
# CHECK: [0,0] DeER . . addl %eax, %eax
58+
# CHECK-NEXT: [0,1] DeeeeeeER adcl %eax, 4(%rsp)
6259

6360
# CHECK: Average Wait times (based on the timeline view):
6461
# CHECK-NEXT: [0]: Executions
@@ -68,5 +65,5 @@ adc %eax, 4(%rsp)
6865

6966
# CHECK: [0] [1] [2] [3]
7067
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 addl %eax, %eax
71-
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 adcl %eax, 4(%rsp)
72-
# CHECK-NEXT: 1 1.5 0.5 0.0 <total>
68+
# CHECK-NEXT: 1. 1 1.0 0.0 0.0 adcl %eax, 4(%rsp)
69+
# CHECK-NEXT: 1 1.0 0.5 0.0 <total>

llvm/test/tools/llvm-mca/X86/BtVer2/rmw-add-sequence-readadvance.s

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,17 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
22
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline < %s | FileCheck %s
33

4-
# FIXME: PR51322
5-
# Missing read-advance for register EAX.
6-
74
add %eax, %eax
85
add %eax, 4(%rsp)
96

107
# CHECK: Iterations: 1
118
# CHECK-NEXT: Instructions: 2
12-
# CHECK-NEXT: Total Cycles: 10
9+
# CHECK-NEXT: Total Cycles: 9
1310
# CHECK-NEXT: Total uOps: 2
1411

1512
# CHECK: Dispatch Width: 2
16-
# CHECK-NEXT: uOps Per Cycle: 0.20
17-
# CHECK-NEXT: IPC: 0.20
13+
# CHECK-NEXT: uOps Per Cycle: 0.22
14+
# CHECK-NEXT: IPC: 0.22
1815
# CHECK-NEXT: Block RThroughput: 1.0
1916

2017
# CHECK: Instruction Info:
@@ -55,10 +52,10 @@ add %eax, 4(%rsp)
5552
# CHECK-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addl %eax, 4(%rsp)
5653

5754
# CHECK: Timeline view:
58-
# CHECK-NEXT: Index 0123456789
55+
# CHECK-NEXT: Index 012345678
5956

60-
# CHECK: [0,0] DeER . . addl %eax, %eax
61-
# CHECK-NEXT: [0,1] D=eeeeeeER addl %eax, 4(%rsp)
57+
# CHECK: [0,0] DeER . . addl %eax, %eax
58+
# CHECK-NEXT: [0,1] DeeeeeeER addl %eax, 4(%rsp)
6259

6360
# CHECK: Average Wait times (based on the timeline view):
6461
# CHECK-NEXT: [0]: Executions
@@ -68,5 +65,5 @@ add %eax, 4(%rsp)
6865

6966
# CHECK: [0] [1] [2] [3]
7067
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 addl %eax, %eax
71-
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 addl %eax, 4(%rsp)
72-
# CHECK-NEXT: 1 1.5 0.5 0.0 <total>
68+
# CHECK-NEXT: 1. 1 1.0 0.0 0.0 addl %eax, 4(%rsp)
69+
# CHECK-NEXT: 1 1.0 0.5 0.0 <total>

0 commit comments

Comments
 (0)