Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 04acf64

Browse files
author
Balaram Makam
committed
[AArch64] Refine Kryo Machine Model
Summary: Refine floating point SQRT and DIV with accurate latency information. Reviewers: mcrosier Subscribers: aemerson, rengolin, llvm-commits Differential Revision: https://reviews.llvm.org/D29191 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293204 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 68ce932 commit 04acf64

File tree

1 file changed

+40
-22
lines changed

1 file changed

+40
-22
lines changed

lib/Target/AArch64/AArch64SchedKryoDetails.td

Lines changed: 40 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -776,23 +776,29 @@ def KryoWrite_4cyc_X_X_115ln :
776776
}
777777
def : InstRW<[KryoWrite_4cyc_X_X_115ln],
778778
(instregex "FCVTZ(S|U)(v2f64|v4f32|(v2i64|v4i32)(_shift)?)$")>;
779-
def KryoWrite_1cyc_XA_Y_noRSV_43ln :
779+
def KryoWrite_10cyc_XA_Y_noRSV_43ln :
780780
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
781-
let Latency = 1; let NumMicroOps = 3;
781+
let Latency = 10; let NumMicroOps = 3;
782782
}
783-
def : InstRW<[KryoWrite_1cyc_XA_Y_noRSV_43ln],
784-
(instrs FDIVDrr, FDIVSrr)>;
785-
def KryoWrite_1cyc_XA_Y_noRSV_121ln :
783+
def : InstRW<[KryoWrite_10cyc_XA_Y_noRSV_43ln],
784+
(instrs FDIVSrr)>;
785+
def KryoWrite_14cyc_XA_Y_noRSV_43ln :
786786
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
787-
let Latency = 1; let NumMicroOps = 3;
787+
let Latency = 14; let NumMicroOps = 3;
788788
}
789-
def : InstRW<[KryoWrite_1cyc_XA_Y_noRSV_121ln],
789+
def : InstRW<[KryoWrite_14cyc_XA_Y_noRSV_43ln],
790+
(instrs FDIVDrr)>;
791+
def KryoWrite_10cyc_XA_Y_noRSV_121ln :
792+
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
793+
let Latency = 10; let NumMicroOps = 3;
794+
}
795+
def : InstRW<[KryoWrite_10cyc_XA_Y_noRSV_121ln],
790796
(instrs FDIVv2f32)>;
791-
def KryoWrite_1cyc_XA_Y_XA_Y_123ln :
797+
def KryoWrite_14cyc_XA_Y_XA_Y_123ln :
792798
SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> {
793-
let Latency = 1; let NumMicroOps = 4;
799+
let Latency = 14; let NumMicroOps = 4;
794800
}
795-
def : InstRW<[KryoWrite_1cyc_XA_Y_XA_Y_123ln],
801+
def : InstRW<[KryoWrite_14cyc_XA_Y_XA_Y_123ln],
796802
(instrs FDIVv2f64, FDIVv4f32)>;
797803
def KryoWrite_5cyc_X_noRSV_55ln :
798804
SchedWriteRes<[KryoUnitX]> {
@@ -968,24 +974,36 @@ def KryoWrite_2cyc_XY_XY_109ln :
968974
}
969975
def : InstRW<[KryoWrite_2cyc_XY_XY_109ln],
970976
(instregex "FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)")>;
971-
def KryoWrite_1cyc_XA_Y_noRSV_42ln :
977+
def KryoWrite_12cyc_XA_Y_noRSV_42ln :
972978
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
973-
let Latency = 1; let NumMicroOps = 3;
979+
let Latency = 12; let NumMicroOps = 3;
974980
}
975-
def : InstRW<[KryoWrite_1cyc_XA_Y_noRSV_42ln],
976-
(instregex "FSQRT(S|D)r")>;
977-
def KryoWrite_1cyc_XA_Y_noRSV_120ln :
981+
def : InstRW<[KryoWrite_12cyc_XA_Y_noRSV_42ln],
982+
(instrs FSQRTSr)>;
983+
def KryoWrite_21cyc_XA_Y_noRSV_42ln :
978984
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
979-
let Latency = 1; let NumMicroOps = 3;
985+
let Latency = 21; let NumMicroOps = 3;
986+
}
987+
def : InstRW<[KryoWrite_21cyc_XA_Y_noRSV_42ln],
988+
(instrs FSQRTDr)>;
989+
def KryoWrite_12cyc_XA_Y_noRSV_120ln :
990+
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
991+
let Latency = 12; let NumMicroOps = 3;
992+
}
993+
def : InstRW<[KryoWrite_12cyc_XA_Y_noRSV_120ln],
994+
(instrs FSQRTv2f32)>;
995+
def KryoWrite_21cyc_XA_Y_XA_Y_122ln :
996+
SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> {
997+
let Latency = 21; let NumMicroOps = 4;
980998
}
981-
def : InstRW<[KryoWrite_1cyc_XA_Y_noRSV_120ln],
982-
(instregex "FSQRTv2f32")>;
983-
def KryoWrite_1cyc_XA_Y_XA_Y_122ln :
999+
def : InstRW<[KryoWrite_21cyc_XA_Y_XA_Y_122ln],
1000+
(instrs FSQRTv4f32)>;
1001+
def KryoWrite_36cyc_XA_Y_XA_Y_122ln :
9841002
SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> {
985-
let Latency = 1; let NumMicroOps = 4;
1003+
let Latency = 36; let NumMicroOps = 4;
9861004
}
987-
def : InstRW<[KryoWrite_1cyc_XA_Y_XA_Y_122ln],
988-
(instregex "FSQRT(v2f64|v4f32)")>;
1005+
def : InstRW<[KryoWrite_36cyc_XA_Y_XA_Y_122ln],
1006+
(instrs FSQRTv2f64)>;
9891007
def KryoWrite_1cyc_X_201ln :
9901008
SchedWriteRes<[KryoUnitX]> {
9911009
let Latency = 1; let NumMicroOps = 1;

0 commit comments

Comments
 (0)